librarys

/ /

suppressMessages(library(ggplot2))
suppressMessages(library(ape))
suppressMessages(library(limma))
suppressMessages(library(edgeR))
suppressMessages(library(statmod))
suppressMessages(library(stats))
suppressMessages(library(dplyr))
suppressMessages(library(stringr))
suppressMessages(library(tibble))
suppressMessages(library(ggforce))
suppressMessages(library(WGCNA))
suppressMessages(library(km2gcn))
suppressMessages(library(CoExpNets))
suppressMessages(library(devtools))
suppressMessages(library(UpSetR))
suppressMessages(library(RCy3))
suppressMessages(library(readr))
suppressMessages(library(scales))
suppressMessages(library(data.table))
suppressMessages(library(forcats))
suppressMessages(library(kableExtra))

Setting up dataframes

setwd("/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper")




rsem_gene_data <- read.table("epersephone_rsem_gene_count_proteinCODINGonly_2022.08.14.matrix")


s2c<-read.csv("rsem_gene_info.csv",header = TRUE, stringsAsFactors=FALSE)
s2c$sample
##  [1] "sample103"  "sample106"  "sample112"  "sample115"  "sample119" 
##  [6] "sample121"  "sample127"  "sample1294" "sample1300" "sample1310"
## [11] "sample1324" "sample1330" "sample1346" "sample1347" "sample1353"
## [16] "sample1369" "sample136"  "sample16"   "sample1750" "sample1871"
## [21] "sample1876" "sample28"   "sample36"   "sample54"   "sample56"  
## [26] "sample58"   "sample69"   "sample72"   "sample78"   "sample94"
rnaseqMatrix=round(rsem_gene_data)

filter=rowSums(cpm(rnaseqMatrix)>=1)>=3
summary(filter)
##    Mode   FALSE    TRUE 
## logical      28    3140
cpm_filtered_matrix=rnaseqMatrix[filter,]
#cpm_filtered_matrix
DGE<-DGEList(cpm_filtered_matrix)
DGE<-calcNormFactors(DGE,method =c("TMM"))
#DGE


treatvals<-s2c$condition

design_condition=model.matrix(~condition, data=s2c)

RNAseq_voom <- voomWithQualityWeights(DGE, design=design_condition,normalize.method="none")$E


###this filters out the genes that have less variation in expression
WGCNA_matrix = t(RNAseq_voom[order(apply(RNAseq_voom,1,mad), decreasing = T)[1:2500],])
WGCNA_matrix <- as.data.frame(WGCNA_matrix)

row.names(WGCNA_matrix) = s2c$sample

names(s2c)
##  [1] "sample"      "condition"   "reps"        "size"        "size_cat"   
##  [6] "troph_color" "X"           "X.1"         "X.2"         "X.3"        
## [11] "X.4"
###remove the blank columns
s2c <- s2c[,1:6]
sample_rownames <- rownames(WGCNA_matrix)

s2c2 <- s2c[, -1]
rownames(s2c2) <- sample_rownames

#removing rep and size categories
s2c3 <- s2c2[,c(1,4:5)]
names(s2c3)
## [1] "condition"   "size_cat"    "troph_color"
####making this binary data for downstream analyses

s2c4 <- s2c3 %>%
  mutate(Sulfide = case_when(
    startsWith(condition, "HS") ~ "1",
    startsWith(condition, "LS") ~ "0",
    startsWith(condition, "SW") ~ "0",
    startsWith(condition, "H2")~ "0"
    ))

s2c5 <- s2c4 %>%
  mutate(Oxygen = case_when(
    endsWith(condition, "HO") ~ "1",
    endsWith(condition, "LO") ~ "0"
    ))

s2c6 <- s2c5 %>%
  mutate(Nitrogen = case_when(
    grepl("noN",condition) ~ "0",
    grepl("_N",condition) ~ "1")
  ) 


s2c7 <- s2c6 %>%
  mutate(Hydrogen = case_when(
    startsWith(condition, "HS") ~ "0",
    startsWith(condition, "LS") ~ "0",
    startsWith(condition, "SW") ~ "0",
    startsWith(condition, "H2")~ "1"
    ))

s2c20 <- s2c7 %>%
  mutate(Low_to_No_sulfide = case_when(
    startsWith(condition, "HS") ~ "0",
    startsWith(condition, "LS") ~ "1",
    startsWith(condition, "SW") ~ "1",
    startsWith(condition, "H2")~ "0"
    ))


s2c8 <- s2c20[,4:8]

s2c8$Sulfide <- as.numeric(unlist(s2c8$Sulfide))
s2c8$Oxygen <- as.numeric(unlist(s2c8$Oxygen))
s2c8$Nitrogen <- as.numeric(unlist(s2c8$Nitrogen))
s2c8$Hydrogen <- as.numeric(unlist(s2c8$Hydrogen))
s2c8$Low_to_No_sulfide <- as.numeric(unlist(s2c8$Low_to_No_sulfide))
str(s2c8)
## 'data.frame':    30 obs. of  5 variables:
##  $ Sulfide          : num  0 0 1 1 1 1 1 1 1 1 ...
##  $ Oxygen           : num  1 1 0 0 0 1 1 1 1 1 ...
##  $ Nitrogen         : num  1 1 1 1 1 1 1 0 0 0 ...
##  $ Hydrogen         : num  1 1 0 0 0 0 0 0 0 0 ...
##  $ Low_to_No_sulfide: num  0 0 0 0 0 0 0 0 0 0 ...
s2c9 <- s2c3 %>%
  mutate(H2_N_HO = case_when(
    condition== "H2_N_HO" ~ "1",
    TRUE ~ "0"))
s210 <-   s2c9  %>%
  mutate(HS_N_LO = case_when(
    condition== "HS_N_LO" ~ "1",
    TRUE ~ "0"))

s211 <-   s210  %>%
  mutate(HS_N_HO = case_when(
    condition== "HS_N_HO" ~ "1",
    TRUE ~ "0")) 
s212 <-   s211  %>%
  mutate(HS_noN_HO = case_when(
    condition== "HS_noN_HO" ~ "1",
    TRUE ~ "0")) 
s213 <-   s212  %>%
  mutate(LS_noN_HO = case_when(
    condition== "LS_noN_HO" ~ "1",
    TRUE ~ "0")) 
s214 <-   s213  %>%
  mutate(SW_noN_HO = case_when(
    condition== "SW_noN_HO" ~ "1",
    TRUE ~ "0")) 
s215 <-   s214  %>%
  mutate(LS_N_HO = case_when(
    condition== "LS_N_HO" ~ "1",
    TRUE ~ "0"))                 
 s216 <-   s215  %>%
  mutate(H2_noN_HO = case_when(
    condition== "H2_noN_HO" ~ "1",
    TRUE ~ "0"))                
 s217 <-   s216  %>%
  mutate(H2_N_LO = case_when(
    condition== "H2_N_LO" ~ "1",
    TRUE ~ "0"))               
s218 <-   s217  %>%
  mutate(H2_noN_LO = case_when(
    condition== "H2_noN_LO" ~ "1",
    TRUE ~ "0"))               
               
names(s218) 
##  [1] "condition"   "size_cat"    "troph_color" "H2_N_HO"     "HS_N_LO"    
##  [6] "HS_N_HO"     "HS_noN_HO"   "LS_noN_HO"   "SW_noN_HO"   "LS_N_HO"    
## [11] "H2_noN_HO"   "H2_N_LO"     "H2_noN_LO"
s218$H2_N_HO <- as.numeric(unlist(s218$H2_N_HO))
s218$HS_N_LO <- as.numeric(unlist(s218$HS_N_LO))
s218$HS_N_HO <- as.numeric(unlist(s218$HS_N_HO))
s218$HS_noN_HO <- as.numeric(unlist(s218$HS_noN_HO))
s218$LS_noN_HO <- as.numeric(unlist(s218$LS_noN_HO))
s218$SW_noN_HO <- as.numeric(unlist(s218$SW_noN_HO))
s218$LS_N_HO <- as.numeric(unlist(s218$LS_N_HO))
s218$H2_noN_HO <- as.numeric(unlist(s218$H2_noN_HO))
s218$H2_N_LO <- as.numeric(unlist(s218$H2_N_LO))
s218$H2_noN_LO <- as.numeric(unlist(s218$H2_noN_LO))



s218 <- s218[,4:13]                
biconditions <- cbind(s218,s2c8)                 

save(WGCNA_matrix, biconditions, file = "count_matrix_condition_info_dataInput.RData")

/

/

softpower choice

powers = c(c(1:10), seq(from = 12, to=20, by=2))

# Call the network topology analysis function
sft2 = pickSoftThreshold(WGCNA_matrix, powerVector = powers, networkType= "signed hybrid", verbose = 2)
## pickSoftThreshold: will use block size 2500.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 2500 of 2500
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.0489 -0.388          0.858 331.000  308.0000 639.00
## 2      2   0.4660 -1.170          0.913 124.000  112.0000 343.00
## 3      3   0.6690 -1.540          0.944  57.600   48.3000 211.00
## 4      4   0.7060 -1.770          0.933  30.400   23.5000 140.00
## 5      5   0.7310 -1.910          0.942  17.600   12.5000  96.90
## 6      6   0.7640 -1.900          0.956  10.900    7.0200  69.60
## 7      7   0.7900 -1.900          0.968   7.060    4.0600  51.30
## 8      8   0.8070 -1.890          0.970   4.800    2.4400  38.60
## 9      9   0.8150 -1.850          0.974   3.370    1.5300  29.50
## 10    10   0.8220 -1.830          0.972   2.440    0.9920  22.90
## 11    12   0.8720 -1.640          0.985   1.380    0.4430  14.30
## 12    14   0.8920 -1.490          0.975   0.839    0.2160   9.26
## 13    16   0.9150 -1.430          0.968   0.544    0.1110   6.68
## 14    18   0.9660 -1.380          0.991   0.371    0.0579   5.22
## 15    20   0.9190 -1.490          0.949   0.264    0.0317   4.66
# Plot the results:

par(mar = c(1,2,1,2));
cex1 = 0.8;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft2$fitIndices[,1], -sign(sft2$fitIndices[,3])*sft2$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.8,col="red")

# Mean connectivity as a function of the soft-thresholding power
par(mar = c(1,2,1,2))
plot(sft2$fitIndices[,1], sft2$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))

text(sft2$fitIndices[,1], sft2$fitIndices[,5], labels=powers, cex=cex1,col="red")

sft2$fitIndices
##    Power   SFT.R.sq      slope truncated.R.sq     mean.k.    median.k.
## 1      1 0.04890048 -0.3884992      0.8581139 331.4011205 308.13070231
## 2      2 0.46592537 -1.1680241      0.9132379 124.4904470 112.01173850
## 3      3 0.66927892 -1.5390034      0.9440341  57.6399451  48.32562217
## 4      4 0.70560384 -1.7734087      0.9325776  30.4063264  23.49079218
## 5      5 0.73115622 -1.9082518      0.9420155  17.5623528  12.50091882
## 6      6 0.76403365 -1.9033807      0.9561525  10.8503969   7.02207374
## 7      7 0.79011799 -1.8998930      0.9678497   7.0639452   4.05989014
## 8      8 0.80724037 -1.8863799      0.9695552   4.7967210   2.44030738
## 9      9 0.81539375 -1.8468855      0.9740395   3.3725516   1.52789332
## 10    10 0.82151935 -1.8250238      0.9724534   2.4419234   0.99168615
## 11    12 0.87162088 -1.6441349      0.9852418   1.3763842   0.44250659
## 12    14 0.89171300 -1.4932610      0.9749399   0.8387154   0.21564804
## 13    16 0.91539066 -1.4287488      0.9678064   0.5438888   0.11080318
## 14    18 0.96562166 -1.3772418      0.9913790   0.3711772   0.05787538
## 15    20 0.91949827 -1.4881952      0.9494624   0.2643675   0.03166435
##        max.k.
## 1  638.846689
## 2  342.917019
## 3  210.869059
## 4  139.536080
## 5   96.870153
## 6   69.586561
## 7   51.295838
## 8   38.594403
## 9   29.529360
## 10  22.915384
## 11  14.292054
## 12   9.260525
## 13   6.684230
## 14   5.219041
## 15   4.660383
####choosing softpower to be 8
softPower = 8

checking on scale free topology

ADJ1=abs(cor(WGCNA_matrix,use="p"))^8
k=as.vector(apply(ADJ1,2,sum, na.rm=T))
sizeGrWindow(10,5)
par(mfrow=c(1,2))
hist(k)
scaleFreePlot(k, main="Check scale free topology\n")

##   scaleFreeRsquared slope
## 1              0.85 -1.93

/

/

build network (manual version)

set.seed(123)
softPower = 8;
adjacency = adjacency(WGCNA_matrix, power = softPower, type="signed hybrid")

dissTOM = TOMdist(adjacency, TOMType = "signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average");
# Plot the resulting clustering tree (dendrogram)

par(mar = c(3,3,3,3))


plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity",
     labels = FALSE, hang = 0.04);

minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,
                deepSplit = 2, pamRespectsDendro = FALSE,
                minClusterSize = minModuleSize)
##  ..cutHeight not given, setting it to 0.998  ===>  99% of the (truncated) height range in dendro.
##  ..done.
table(dynamicMods)
## dynamicMods
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
## 242 233 214 160 155 143 131 124 124 120 109 107 106 100  99  83  79  72  53  46
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
##        black         blue        brown         cyan        green  greenyellow 
##          124          214          160           99          143          107 
##         grey       grey60    lightcyan   lightgreen  lightyellow      magenta 
##          242           72           79           53           46          120 
## midnightblue         pink       purple          red       salmon          tan 
##           83          124          109          131          100          106 
##    turquoise       yellow 
##          233          155
####changing names of colors

colorOrder = c("grey", standardColors(50))
list(colorOrder)
## [[1]]
##  [1] "grey"            "turquoise"       "blue"            "brown"          
##  [5] "yellow"          "green"           "red"             "black"          
##  [9] "pink"            "magenta"         "purple"          "greenyellow"    
## [13] "tan"             "salmon"          "cyan"            "midnightblue"   
## [17] "lightcyan"       "grey60"          "lightgreen"      "lightyellow"    
## [21] "royalblue"       "darkred"         "darkgreen"       "darkturquoise"  
## [25] "darkgrey"        "orange"          "darkorange"      "white"          
## [29] "skyblue"         "saddlebrown"     "steelblue"       "paleturquoise"  
## [33] "violet"          "darkolivegreen"  "darkmagenta"     "sienna3"        
## [37] "yellowgreen"     "skyblue3"        "plum1"           "orangered4"     
## [41] "mediumpurple3"   "lightsteelblue1" "lightcyan1"      "ivory"          
## [45] "floralwhite"     "darkorange2"     "brown4"          "bisque4"        
## [49] "darkslateblue"   "plum2"           "thistle2"
moduleLabels = match(dynamicColors, colorOrder)-1
head(moduleLabels)
## [1] 10 13 15 17 17 17
table(moduleLabels)
## moduleLabels
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19 
## 242 233 214 160 155 143 131 124 124 120 109 107 106 100  99  83  79  72  53  46
table(dynamicColors) 
## dynamicColors
##        black         blue        brown         cyan        green  greenyellow 
##          124          214          160           99          143          107 
##         grey       grey60    lightcyan   lightgreen  lightyellow      magenta 
##          242           72           79           53           46          120 
## midnightblue         pink       purple          red       salmon          tan 
##           83          124          109          131          100          106 
##    turquoise       yellow 
##          233          155
customColorOrder = c("grey", "brown", "deeppink3", "darkcyan","plum","darkgreen","red","black","pink","skyblue","plum4","greenyellow","tan","darkorchid4","darkgoldenrod1","midnightblue","lightcyan","grey60","lightgreen","yellow")






moduleColors.custom = customColorOrder[moduleLabels + 1]
table(dynamicColors, moduleColors.custom)##sanity check
##               moduleColors.custom
## dynamicColors  black brown darkcyan darkgoldenrod1 darkgreen darkorchid4
##   black          124     0        0              0         0           0
##   blue             0     0        0              0         0           0
##   brown            0     0      160              0         0           0
##   cyan             0     0        0             99         0           0
##   green            0     0        0              0       143           0
##   greenyellow      0     0        0              0         0           0
##   grey             0     0        0              0         0           0
##   grey60           0     0        0              0         0           0
##   lightcyan        0     0        0              0         0           0
##   lightgreen       0     0        0              0         0           0
##   lightyellow      0     0        0              0         0           0
##   magenta          0     0        0              0         0           0
##   midnightblue     0     0        0              0         0           0
##   pink             0     0        0              0         0           0
##   purple           0     0        0              0         0           0
##   red              0     0        0              0         0           0
##   salmon           0     0        0              0         0         100
##   tan              0     0        0              0         0           0
##   turquoise        0   233        0              0         0           0
##   yellow           0     0        0              0         0           0
##               moduleColors.custom
## dynamicColors  deeppink3 greenyellow grey grey60 lightcyan lightgreen
##   black                0           0    0      0         0          0
##   blue               214           0    0      0         0          0
##   brown                0           0    0      0         0          0
##   cyan                 0           0    0      0         0          0
##   green                0           0    0      0         0          0
##   greenyellow          0         107    0      0         0          0
##   grey                 0           0  242      0         0          0
##   grey60               0           0    0     72         0          0
##   lightcyan            0           0    0      0        79          0
##   lightgreen           0           0    0      0         0         53
##   lightyellow          0           0    0      0         0          0
##   magenta              0           0    0      0         0          0
##   midnightblue         0           0    0      0         0          0
##   pink                 0           0    0      0         0          0
##   purple               0           0    0      0         0          0
##   red                  0           0    0      0         0          0
##   salmon               0           0    0      0         0          0
##   tan                  0           0    0      0         0          0
##   turquoise            0           0    0      0         0          0
##   yellow               0           0    0      0         0          0
##               moduleColors.custom
## dynamicColors  midnightblue pink plum plum4 red skyblue tan yellow
##   black                   0    0    0     0   0       0   0      0
##   blue                    0    0    0     0   0       0   0      0
##   brown                   0    0    0     0   0       0   0      0
##   cyan                    0    0    0     0   0       0   0      0
##   green                   0    0    0     0   0       0   0      0
##   greenyellow             0    0    0     0   0       0   0      0
##   grey                    0    0    0     0   0       0   0      0
##   grey60                  0    0    0     0   0       0   0      0
##   lightcyan               0    0    0     0   0       0   0      0
##   lightgreen              0    0    0     0   0       0   0      0
##   lightyellow             0    0    0     0   0       0   0     46
##   magenta                 0    0    0     0   0     120   0      0
##   midnightblue           83    0    0     0   0       0   0      0
##   pink                    0  124    0     0   0       0   0      0
##   purple                  0    0    0   109   0       0   0      0
##   red                     0    0    0     0 131       0   0      0
##   salmon                  0    0    0     0   0       0   0      0
##   tan                     0    0    0     0   0       0 106      0
##   turquoise               0    0    0     0   0       0   0      0
##   yellow                  0    0  155     0   0       0   0      0
str(dynamicColors)
##  chr [1:2500] "purple" "salmon" "midnightblue" "grey60" "grey60" "grey60" ...
str(moduleColors.custom)
##  chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
plotDendroAndColors(geneTree, moduleColors.custom, "Dynamic Tree Cut",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")

# Calculate eigengenes
MEList1 = moduleEigengenes(WGCNA_matrix, colors = moduleColors.custom)
MEs1 = MEList1$eigengenes
 #Calculate dissimilarity of module eigengenes
MEDiss1 = 1-cor(MEs1);
# Cluster module eigengenes
METree1 = hclust(as.dist(MEDiss1), method = "average")
# Plot the result


par(mar = c(3,3,3,3))
plot(METree1, main = "Clustering of module eigengenes",
     xlab = "", sub = "")
MEDissThres = 0.3
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")

# Call an automatic merging function
merge = mergeCloseModules(WGCNA_matrix, moduleColors.custom, cutHeight = MEDissThres, verbose = 3)
##  mergeCloseModules: Merging modules whose distance is less than 0.3
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 20 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 16 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 15 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 15 module eigengenes in given set.
#Eigengenes of the new merged modules with new colors
mergedMEs = merge$newMEs;
mergedColors = merge$colors
table(moduleColors.custom)
## moduleColors.custom
##          black          brown       darkcyan darkgoldenrod1      darkgreen 
##            124            233            160             99            143 
##    darkorchid4      deeppink3    greenyellow           grey         grey60 
##            100            214            107            242             72 
##      lightcyan     lightgreen   midnightblue           pink           plum 
##             79             53             83            124            155 
##          plum4            red        skyblue            tan         yellow 
##            109            131            120            106             46
table(mergedColors)
## mergedColors
##          black          brown       darkcyan darkgoldenrod1      darkgreen 
##            124            233            239             99            143 
##    darkorchid4      deeppink3    greenyellow           grey         grey60 
##            100            214            160            242            203 
##   midnightblue           pink          plum4        skyblue         yellow 
##             83            385            109            120             46
plotDendroAndColors(geneTree, cbind(moduleColors.custom, mergedColors),
                    c("Dynamic Tree Cut", "Merged dynamic"),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05)

nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)
modNames = substring(names(mergedMEs), 3)

MEs0 = moduleEigengenes(WGCNA_matrix, mergedColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, biconditions, use = "p")
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

save(MEs, mergedColors, geneTree,moduleTraitCor,moduleTraitPvalue,
file = "wgcna_manual_network20220910.RData")
lnames = load(file = "wgcna_manual_network20220910.RData")
lnames = load(file= "count_matrix_condition_info_dataInput.RData")






# Convert traits to a color representation: white means low, red means high, grey means missing entry


par(mar = c(1,1,2,2))
par(mfrow=c(1,1))
sampleTree = hclust(dist(WGCNA_matrix), method = "average")

names(biconditions)
##  [1] "H2_N_HO"           "HS_N_LO"           "HS_N_HO"          
##  [4] "HS_noN_HO"         "LS_noN_HO"         "SW_noN_HO"        
##  [7] "LS_N_HO"           "H2_noN_HO"         "H2_N_LO"          
## [10] "H2_noN_LO"         "Sulfide"           "Oxygen"           
## [13] "Nitrogen"          "Hydrogen"          "Low_to_No_sulfide"
biconditions <- biconditions[,1:14]

desired_order <- c("HS_N_HO" , "HS_noN_HO", "HS_N_LO","LS_N_HO","LS_noN_HO","SW_noN_HO","H2_N_HO","H2_noN_HO","H2_N_LO","H2_noN_LO","Sulfide","Oxygen","Nitrogen","Hydrogen") 

biconditions <- biconditions[,match(desired_order,colnames(biconditions))]



traitColors = numbers2colors(biconditions, signed = TRUE);
# Plot the sample dendrogram and the colors underneath.
plotDendroAndColors(sampleTree, traitColors,
                    groupLabels = names(biconditions),
                    main = "Sample dendrogram and trait heatmap")

nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)




moduleColors = mergedColors
MEs0 = moduleEigengenes(WGCNA_matrix, moduleColors)$eigengenes
MEs = orderMEs(MEs0)

##reordering MEs to match figure 
mycolororder <- c("MEmidnightblue","MEdarkorchid4","MEyellow","MEgreenyellow", "MEblack", "MEdarkgreen","MEdeeppink3","MEpink","MEgrey60","MEbrown","MEplum4","MEskyblue","MEdarkcyan","MEdarkgoldenrod1","MEgrey")
MEs <- MEs[,match(mycolororder,colnames(MEs))]

biconditions <- biconditions[,match(desired_order,colnames(biconditions))]


moduleTraitCor = cor(MEs, biconditions, use = "p")

moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)

summary(moduleTraitPvalue)
##     HS_N_HO          HS_noN_HO          HS_N_LO            LS_N_HO      
##  Min.   :0.01196   Min.   :0.00000   Min.   :0.003134   Min.   :0.2678  
##  1st Qu.:0.03387   1st Qu.:0.03521   1st Qu.:0.137401   1st Qu.:0.4198  
##  Median :0.14691   Median :0.27026   Median :0.344828   Median :0.5388  
##  Mean   :0.26185   Mean   :0.32608   Mean   :0.396634   Mean   :0.5700  
##  3rd Qu.:0.32153   3rd Qu.:0.57388   3rd Qu.:0.671187   3rd Qu.:0.6861  
##  Max.   :0.91450   Max.   :0.93636   Max.   :0.970864   Max.   :0.9718  
##    LS_noN_HO           SW_noN_HO            H2_N_HO          H2_noN_HO      
##  Min.   :0.0000558   Min.   :0.0008013   Min.   :0.01812   Min.   :0.03995  
##  1st Qu.:0.1967968   1st Qu.:0.2808627   1st Qu.:0.29340   1st Qu.:0.13709  
##  Median :0.4090254   Median :0.3534417   Median :0.51191   Median :0.21959  
##  Mean   :0.4217359   Mean   :0.4078278   Mean   :0.49212   Mean   :0.33175  
##  3rd Qu.:0.6354574   3rd Qu.:0.6356339   3rd Qu.:0.69145   3rd Qu.:0.50608  
##  Max.   :0.8682022   Max.   :0.7872686   Max.   :0.98003   Max.   :0.78614  
##     H2_N_LO           H2_noN_LO          Sulfide             Oxygen         
##  Min.   :0.001603   Min.   :0.05381   Min.   :0.001234   Min.   :0.0000002  
##  1st Qu.:0.060706   1st Qu.:0.10483   1st Qu.:0.067933   1st Qu.:0.0007034  
##  Median :0.145126   Median :0.18447   Median :0.174750   Median :0.0103513  
##  Mean   :0.239831   Mean   :0.27926   Mean   :0.366110   Mean   :0.0822130  
##  3rd Qu.:0.435491   3rd Qu.:0.36867   3rd Qu.:0.667126   3rd Qu.:0.0620483  
##  Max.   :0.716300   Max.   :0.77636   Max.   :0.970160   Max.   :0.5030011  
##     Nitrogen           Hydrogen       
##  Min.   :0.002802   Min.   :0.000174  
##  1st Qu.:0.143535   1st Qu.:0.034898  
##  Median :0.314161   Median :0.068381  
##  Mean   :0.334997   Mean   :0.203924  
##  3rd Qu.:0.555353   3rd Qu.:0.219406  
##  Max.   :0.841863   Max.   :0.900770
textMatrix =  paste(signif(moduleTraitCor, 2), "\n(",
                           signif(moduleTraitPvalue, 1), ")", sep = "")








dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(3,4, 1,1))

names(biconditions)
##  [1] "HS_N_HO"   "HS_noN_HO" "HS_N_LO"   "LS_N_HO"   "LS_noN_HO" "SW_noN_HO"
##  [7] "H2_N_HO"   "H2_noN_HO" "H2_N_LO"   "H2_noN_LO" "Sulfide"   "Oxygen"   
## [13] "Nitrogen"  "Hydrogen"
head(moduleTraitCor)
##                   HS_N_HO  HS_noN_HO    HS_N_LO      LS_N_HO  LS_noN_HO
## MEmidnightblue 0.02046914 0.77549274 -0.1012675 -0.006751255 0.24346567
## MEdarkorchid4  0.39363790 0.20790779 -0.3311569  0.208972393 0.24138458
## MEyellow       0.04160808 0.06616182 -0.5017755  0.093907529 0.06104898
## MEgreenyellow  0.22355044 0.08593010 -0.1231448  0.134486463 0.16362566
## MEblack        0.16895507 0.30463652 -0.5213307  0.137603215 0.15939483
## MEdarkgreen    0.29051730 0.14258634 -0.4439989  0.165647455 0.14956928
##                  SW_noN_HO      H2_N_HO   H2_noN_HO    H2_N_LO  H2_noN_LO
## MEmidnightblue  0.10228132 -0.326359663 -0.07370678 -0.3379315 -0.2956921
## MEdarkorchid4   0.06986956 -0.214684994  0.29526963 -0.5156230 -0.3555769
## MEyellow        0.18334438  0.064921651  0.15299999  0.1231695 -0.2853864
## MEgreenyellow   0.07837730 -0.428591372  0.23740423 -0.2124148 -0.1592232
## MEblack         0.17555561 -0.124569788  0.24385690 -0.3445732 -0.1995284
## MEdarkgreen    -0.05142174 -0.004771732  0.23089975 -0.2973662 -0.1816616
##                     Sulfide    Oxygen   Nitrogen    Hydrogen
## MEmidnightblue  0.454784198 0.4810992 -0.4511045 -0.63300331
## MEdarkorchid4   0.177011014 0.7871273 -0.2753128 -0.48415103
## MEyellow       -0.257937228 0.4346851 -0.1069013  0.03411204
## MEgreenyellow   0.121985353 0.3239114 -0.2436685 -0.34465859
## MEblack        -0.031252607 0.6974892 -0.4103493 -0.26014469
## MEdarkgreen    -0.007132597 0.6042628 -0.1739832 -0.15486885
# Display the correlation values within a heatmap plot
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(biconditions),
               yLabels = names(MEs),
               ySymbols = names(MEs),
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               cex.lab.x =0.4,
               cex.lab.y=0.4,
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))

MM and GS calc

nGenes = ncol(WGCNA_matrix)
nSamples = nrow(WGCNA_matrix)

names(biconditions)
##  [1] "HS_N_HO"   "HS_noN_HO" "HS_N_LO"   "LS_N_HO"   "LS_noN_HO" "SW_noN_HO"
##  [7] "H2_N_HO"   "H2_noN_HO" "H2_N_LO"   "H2_noN_LO" "Sulfide"   "Oxygen"   
## [13] "Nitrogen"  "Hydrogen"
Sulfide = as.data.frame(biconditions$Sulfide);
names(Sulfide)[1] <- "Sulfide"
Oxygen <- as.data.frame(biconditions$Oxygen)
names(Oxygen)[1] <- "Oxygen"
Nitrogen <- as.data.frame(biconditions$Nitrogen)
names(Nitrogen)[1] <- "Nitrogen"
Hydrogen <- as.data.frame(biconditions$Hydrogen)
names(Hydrogen)[1] <- "Hydrogen"
H2_N_HO <- as.data.frame(biconditions$H2_N_HO)
names(H2_N_HO)[1] <- "H2_N_HO"
HS_N_LO <- as.data.frame(biconditions$HS_N_LO)
names(HS_N_LO)[1] <- "HS_N_LO"
HS_N_HO <- as.data.frame(biconditions$HS_N_HO)
names(HS_N_HO)[1] <- "HS_N_HO"
HS_noN_HO <- as.data.frame(biconditions$HS_noN_HO)
names(HS_noN_HO)[1] <- "HS_noN_HO"
LS_noN_HO <- as.data.frame(biconditions$LS_noN_HO)
names(LS_noN_HO)[1] <- "LS_noN_HO"
SW_noN_HO <- as.data.frame(biconditions$SW_noN_HO)
names(SW_noN_HO)[1] <- "SW_noN_HO"
LS_N_HO <- as.data.frame(biconditions$LS_N_HO)
names(LS_N_HO)[1] <- "LS_N_HO"
H2_noN_HO <- as.data.frame(biconditions$H2_noN_HO)
names(H2_noN_HO)[1] <- "H2_noN_HO"
H2_N_LO <- as.data.frame(biconditions$H2_N_LO)
names(H2_N_LO)[1] <- "H2_N_LO"
H2_noN_LO <- as.data.frame(biconditions$H2_noN_LO)
names(H2_noN_LO)[1] <- "H2_noN_LO"

modNames = substring(names(MEs), 3)


geneModuleMembership = as.data.frame(cor(WGCNA_matrix, MEs, use = "p"));
MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))
names(geneModuleMembership) = paste("MM", modNames, sep="");
names(MMPvalue) = paste("p.MM", modNames, sep="");


geneSignificance=as.data.frame(cor(WGCNA_matrix, biconditions, use = "p"))
GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSignificance), nSamples))
names(geneSignificance) = paste("GS.", names(biconditions), sep="");
names(GSPvalue) = paste("p.Sul.", names(biconditions), sep="");

str(geneModuleMembership)
## 'data.frame':    2500 obs. of  15 variables:
##  $ MMmidnightblue  : num  -0.298 0.648 0.768 0.625 0.599 ...
##  $ MMdarkorchid4   : num  -0.474 0.848 0.291 0.445 0.409 ...
##  $ MMyellow        : num  -0.0623 0.163 -0.2172 0.211 -0.0863 ...
##  $ MMgreenyellow   : num  -0.1228 0.3899 0.0313 0.0184 -0.2098 ...
##  $ MMblack         : num  -0.1697 0.5395 0.0647 0.4343 0.2309 ...
##  $ MMdarkgreen     : num  -0.0548 0.4469 -0.0929 0.2286 -0.0154 ...
##  $ MMdeeppink3     : num  -0.0172 0.4138 0.0872 0.1792 0.0883 ...
##  $ MMpink          : num  -0.016 0.5972 0.1198 0.1043 -0.0477 ...
##  $ MMgrey60        : num  -0.656 0.183 0.255 0.474 0.511 ...
##  $ MMbrown         : num  -0.138 -0.295 0.237 0.274 0.475 ...
##  $ MMplum4         : num  0.648 -0.599 -0.107 -0.237 -0.344 ...
##  $ MMskyblue       : num  0.247 -0.286 -0.58 -0.626 -0.677 ...
##  $ MMdarkcyan      : num  0.256 -0.84 -0.471 -0.44 -0.391 ...
##  $ MMdarkgoldenrod1: num  -0.173 -0.442 -0.579 -0.381 -0.328 ...
##  $ MMgrey          : num  -0.3438 0.3343 -0.1019 0.0722 0.278 ...
str(geneSignificance)
## 'data.frame':    2500 obs. of  14 variables:
##  $ GS.HS_N_HO  : num  -0.119 0.376 -0.219 -0.477 -0.338 ...
##  $ GS.HS_noN_HO: num  0.134 0.419 0.675 0.431 0.299 ...
##  $ GS.HS_N_LO  : num  0.1614 -0.3814 0.0652 -0.2549 -0.0556 ...
##  $ GS.LS_N_HO  : num  0.0746 0.1922 0.1029 0.0454 -0.0436 ...
##  $ GS.LS_noN_HO: num  -0.567 0.24 0.114 0.362 0.321 ...
##  $ GS.SW_noN_HO: num  -0.1715 -0.1254 0.0558 0.1978 0.2951 ...
##  $ GS.H2_N_HO  : num  0.1176 -0.0198 -0.3035 0.0256 -0.0918 ...
##  $ GS.H2_noN_HO: num  -0.1839 0.0707 -0.1085 0.2563 0.2329 ...
##  $ GS.H2_N_LO  : num  0.196 -0.365 -0.251 -0.358 -0.285 ...
##  $ GS.H2_noN_LO: num  0.357 -0.406 -0.131 -0.229 -0.334 ...
##  $ GS.Sulfide  : num  0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
##  $ GS.Oxygen   : num  -0.468 0.754 0.208 0.551 0.442 ...
##  $ GS.Nitrogen : num  0.259 -0.119 -0.364 -0.611 -0.489 ...
##  $ GS.Hydrogen : num  0.298 -0.441 -0.486 -0.187 -0.293 ...
geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");


geneOxygenSignificance=as.data.frame(cor(WGCNA_matrix, Oxygen, use = "p"))
OxyPvalue = as.data.frame(corPvalueStudent(as.matrix(geneOxygenSignificance), nSamples))
names(geneOxygenSignificance) = paste("Oxy.", names(Oxygen), sep="");
names(OxyPvalue) = paste("p.Oxy.", names(Oxygen), sep="");


geneNitrogenSignificance=as.data.frame(cor(WGCNA_matrix, Nitrogen, use = "p"))
NitPvalue = as.data.frame(corPvalueStudent(as.matrix(geneNitrogenSignificance), nSamples))
names(geneNitrogenSignificance) = paste("Nit.", names(Nitrogen), sep="");
names(NitPvalue) = paste("p.Nit.", names(Nitrogen), sep="");



geneHydrogenSignificance=as.data.frame(cor(WGCNA_matrix, Hydrogen, use = "p"))
HydPvalue = as.data.frame(corPvalueStudent(as.matrix(geneHydrogenSignificance), nSamples))
names(geneHydrogenSignificance) = paste("Hyd.", names(Hydrogen), sep="");
names(HydPvalue) = paste("p.Hyd.", names(Hydrogen), sep="");





####making dataframe to add to cytoscape layer#########################################
sulfsig <- geneSulfideSignificance
sulfsig$locusID <- rownames(sulfsig)
rownames(sulfsig) <- NULL
sulfsig <- sulfsig[,c(2,1)]

oxysig <- geneOxygenSignificance
oxysig$locusID <- rownames(oxysig)
rownames(oxysig) <- NULL
oxysig <- oxysig[,c(2,1)]

nitrosig <- geneNitrogenSignificance
nitrosig$locusID <- rownames(nitrosig)
rownames(nitrosig) <- NULL
nitrosig <- nitrosig[,c(2,1)]


hydrosig <- geneHydrogenSignificance
hydrosig$locusID <- rownames(hydrosig)
rownames(hydrosig) <- NULL
hydrosig <- hydrosig[,c(2,1)]


sig.genes <- cbind(sulfsig,oxysig$Oxy.Oxygen,nitrosig$Nit.Nitrogen,hydrosig$Hyd.Hydrogen)
names(sig.genes)[2] <- "Sulfide_GS"
names(sig.genes)[3] <- "Oxygen_GS"
names(sig.genes)[4] <- "Nitrogen_GS"
names(sig.genes)[5] <- "Hydrogen_GS"
names(sig.genes)
## [1] "locusID"     "Sulfide_GS"  "Oxygen_GS"   "Nitrogen_GS" "Hydrogen_GS"
#write.csv(sig.genes,"sig.genes.csv")



#########################making datframe to oxygen and sulfide dataframe########################################################

sig.genes3 <- cbind(sulfsig,oxysig$Oxy.Oxygen)
colnames(sig.genes3)
## [1] "locusID"           "Sul.Sulfide"       "oxysig$Oxy.Oxygen"
colnames(sig.genes3) <- c("locusID","corSulfide","corOxygen")

sulP <- SulPvalue
sulP <- tibble(locusID = rownames(sulP), sulP)
oxyP <- OxyPvalue
oxyP <- tibble(locusID = rownames(oxyP), oxyP)

colnames(sulP)
## [1] "locusID"       "p.Sul.Sulfide"
colnames(sulP) <- c("locusID","pval_Sulfide")

colnames(oxyP)
## [1] "locusID"      "p.Oxy.Oxygen"
colnames(oxyP) <- c("locusID","pval_Oxygen")




GMM <- geneModuleMembership

GMMP <- MMPvalue

head(sig.genes3)
##              locusID  corSulfide  corOxygen
## 1 gene-L0Y14_RS04070  0.11559790 -0.4677785
## 2 gene-L0Y14_RS09775  0.27041835  0.7541553
## 3 gene-L0Y14_RS01990  0.34126255  0.2075954
## 4 gene-L0Y14_RS09950 -0.19662226  0.5510156
## 5 gene-L0Y14_RS09955 -0.06219137  0.4415475
## 6 gene-L0Y14_RS12365 -0.23047838  0.2361504
head(GMM)
##                    MMmidnightblue MMdarkorchid4    MMyellow MMgreenyellow
## gene-L0Y14_RS04070     -0.2978002    -0.4742295 -0.06228793   -0.12283434
## gene-L0Y14_RS09775      0.6475847     0.8479211  0.16301606    0.38986765
## gene-L0Y14_RS01990      0.7683016     0.2909617 -0.21720940    0.03127418
## gene-L0Y14_RS09950      0.6250107     0.4452151  0.21104415    0.01837192
## gene-L0Y14_RS09955      0.5993774     0.4088890 -0.08634607   -0.20976909
## gene-L0Y14_RS12365      0.3632049     0.1311813 -0.06645301   -0.38765511
##                        MMblack MMdarkgreen MMdeeppink3      MMpink   MMgrey60
## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
## gene-L0Y14_RS09775  0.53953441  0.44692677  0.41381637  0.59720934  0.1830737
## gene-L0Y14_RS01990  0.06471391 -0.09289025  0.08717597  0.11976742  0.2550905
## gene-L0Y14_RS09950  0.43427234  0.22863891  0.17923724  0.10431595  0.4740972
## gene-L0Y14_RS09955  0.23088121 -0.01544034  0.08834925 -0.04765631  0.5107174
## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755  0.7070535
##                       MMbrown    MMplum4  MMskyblue  MMdarkcyan
## gene-L0Y14_RS04070 -0.1384809  0.6483345  0.2465064  0.25589117
## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
## gene-L0Y14_RS01990  0.2365179 -0.1068451 -0.5795192 -0.47056380
## gene-L0Y14_RS09950  0.2737269 -0.2366527 -0.6256319 -0.43994105
## gene-L0Y14_RS09955  0.4747395 -0.3438988 -0.6771603 -0.39111707
## gene-L0Y14_RS12365  0.4675125 -0.2198442 -0.5555037 -0.09885019
##                    MMdarkgoldenrod1      MMgrey
## gene-L0Y14_RS04070       -0.1725039 -0.34377603
## gene-L0Y14_RS09775       -0.4423334  0.33425164
## gene-L0Y14_RS01990       -0.5786492 -0.10188510
## gene-L0Y14_RS09950       -0.3808909  0.07218213
## gene-L0Y14_RS09955       -0.3278772  0.27804497
## gene-L0Y14_RS12365       -0.2081241  0.06962918
head(GMMP)
##                    p.MMmidnightblue p.MMdarkorchid4 p.MMyellow p.MMgreenyellow
## gene-L0Y14_RS04070     1.099727e-01    8.107296e-03  0.7436784      0.51785242
## gene-L0Y14_RS09775     1.095424e-04    3.335534e-09  0.3893952      0.03319238
## gene-L0Y14_RS01990     7.146697e-07    1.187803e-01  0.2489174      0.86968629
## gene-L0Y14_RS09950     2.220815e-04    1.368570e-02  0.2629330      0.92323522
## gene-L0Y14_RS09955     4.649887e-04    2.486376e-02  0.6500519      0.26589457
## gene-L0Y14_RS12365     4.852036e-02    4.895817e-01  0.7271658      0.03429251
##                      p.MMblack p.MMdarkgreen p.MMdeeppink3     p.MMpink
## gene-L0Y14_RS04070 0.369944508    0.77357610    0.92812078 0.9331901555
## gene-L0Y14_RS09775 0.002091438    0.01328534    0.02301156 0.0004935721
## gene-L0Y14_RS01990 0.734046428    0.62539413    0.64690412 0.5284364628
## gene-L0Y14_RS09950 0.016490067    0.22426245    0.34327557 0.5832907609
## gene-L0Y14_RS09955 0.219626475    0.93545678    0.64246404 0.8025293779
## gene-L0Y14_RS12365 0.991524596    0.52541108    0.17462403 0.0894278762
##                      p.MMgrey60   p.MMbrown    p.MMplum4  p.MMskyblue
## gene-L0Y14_RS04070 8.200365e-05 0.465519128 0.0001068964 1.891247e-01
## gene-L0Y14_RS09775 3.328721e-01 0.113659304 0.0004738236 1.256266e-01
## gene-L0Y14_RS01990 1.736871e-01 0.208261116 0.5741429330 7.907558e-04
## gene-L0Y14_RS09950 8.127494e-03 0.143281237 0.2079944552 2.179639e-04
## gene-L0Y14_RS09955 3.928636e-03 0.008029824 0.0627743027 3.961873e-05
## gene-L0Y14_RS12365 1.253228e-05 0.009188101 0.2430811504 1.438928e-03
##                    p.MMdarkcyan p.MMdarkgoldenrod1   p.MMgrey
## gene-L0Y14_RS04070 1.722940e-01       0.3620032361 0.06287441
## gene-L0Y14_RS09775 6.204607e-09       0.0143824230 0.07102686
## gene-L0Y14_RS01990 8.682956e-03       0.0008087425 0.59214321
## gene-L0Y14_RS09950 1.498308e-02       0.0378395675 0.70464811
## gene-L0Y14_RS09955 3.258386e-02       0.0769249135 0.13682488
## gene-L0Y14_RS12365 6.032765e-01       0.2697473140 0.71465331
dim(sig.genes3)
## [1] 2500    3
####before I can merge these I have to change the rownames to columns



# Assuming your datasets are named df1, df2, df3
# Convert row names to column for df2 and df3
GMM <- tibble(locusID = rownames(GMM), GMM)
GMMP <- tibble(locusID = rownames(GMMP), GMMP)



colnames(GMM) <- c("locusID", "GM_MEpurple","GM_MEmidnightblue", "GM_MElightyellow","GM_MEgreenyellow","GM_MEblack","GM_MEgreen","GM_MEcherry","GM_MEpink","GM_MEbrown","GM_MEgrey60", "GM_MElightpurple","GM_MEskyblue","GM_MEteal","GM_MEgold","GM_MEgrey")




colnames(GMMP) <- c("locusID", "GMPval_MEpurple","GMPval_MEmidnightblue", "GMPval_MElightyellow","GMPval_MEgreenyellow","GMPval_MEblack","GMPval_MEgreen","GMPval_MEcherry","GMPval_MEpink","GMPval_MEbrown","GMPval_MEgrey60", "GMPval_MElightpurple","GMPval_MEskyblue","GMPval_MEteal","GMPval_MEgold","GMPval_MEgrey")


merged_df <- merge(sig.genes3, sulP, by = "locusID", all.x = TRUE)
merged_df2 <- merge(merged_df, oxyP, by = "locusID", all.x = TRUE)

merged_df3 <- merge(merged_df2, GMM, by = "locusID", all.x = TRUE)
merged_df4 <- merge(merged_df3, GMMP, by = "locusID", all.x = TRUE)

#write.csv(merged_df4,"GM_GS_sulfide_oxygen.csv")

##################################################################################################################

scatter plots

names(modNames)
## NULL
moduleColors = mergedColors
table(moduleColors)
## moduleColors
##          black          brown       darkcyan darkgoldenrod1      darkgreen 
##            124            233            239             99            143 
##    darkorchid4      deeppink3    greenyellow           grey         grey60 
##            100            214            160            242            203 
##   midnightblue           pink          plum4        skyblue         yellow 
##             83            385            109            120             46
module = "midnightblue"
column = match(module, modNames);
moduleGenes = moduleColors==module;

module2 = "brown"
column2 = match(module2, modNames);
moduleGenes2 = moduleColors==module2;

module3 = "darkgoldenrod1"
column3 = match(module3, modNames);
moduleGenes3 = moduleColors==module3;


module4 = "pink"
column4 = match(module4, modNames);
moduleGenes4 = moduleColors==module4;

module5 = "black"
column5 = match(module5, modNames);
moduleGenes5 = moduleColors==module5;

module6 = "deeppink3"
column6 = match(module6, modNames);
moduleGenes6 = moduleColors==module6;


module7 = "darkcyan"
column7 = match(module7, modNames);
moduleGenes7 = moduleColors==module7;

module8 = "darkgreen"
column8 = match(module8, modNames);
moduleGenes8 = moduleColors==module8;


module9 = "greenyellow"
column9 = match(module9, modNames);
moduleGenes9 = moduleColors==module9;


module10 = "grey60"
column10 = match(module10, modNames);
moduleGenes10 = moduleColors==module10;

module11 = "yellow"
column11 = match(module11, modNames);
moduleGenes11 = moduleColors==module11;

module12 = "skyblue"
column12 = match(module12, modNames);
moduleGenes12 = moduleColors==module12;


module13 = "plum4"
column13 = match(module13, modNames);
moduleGenes13 = moduleColors==module13;


module14 = "darkorchid4"
column14 = match(module14, modNames);
moduleGenes14 = moduleColors==module14;

module15 = "grey"
column15 = match(module15, modNames);
moduleGenes15 = moduleColors==module15;


#sizeGrWindow(7, 7);  # uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))

 #midnight blue and brown       
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneSulfideSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Sulfide",
                    main = paste("Module membership vs. gene significance\n"),
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneOxygenSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneHydrogenSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]),
                   abs(geneNitrogenSignificance[moduleGenes, 1]),
                   xlab = paste("Module Membership in", module, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
                   abs(geneSulfideSignificance[moduleGenes2, 1]),
                   xlab = paste("Module Membership in", module2, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
                   abs(geneOxygenSignificance[moduleGenes2, 1]),
                   xlab = paste("Module Membership in", module2, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
                   abs(geneHydrogenSignificance[moduleGenes2, 1]),
                   xlab = paste("Module Membership in", module2, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes2, column2]),
                   abs(geneNitrogenSignificance[moduleGenes2, 1]),
                   xlab = paste("Module Membership in", module2, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module2,
                    abline=TRUE)

#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))              

####gold and pink
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
                   abs(geneSulfideSignificance[moduleGenes3, 1]),
                   xlab = paste("Module Membership in", module3, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
                   abs(geneOxygenSignificance[moduleGenes3, 1]),
                   xlab = paste("Module Membership in", module3, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
                   abs(geneHydrogenSignificance[moduleGenes3, 1]),
                   xlab = paste("Module Membership in", module3, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
                   abs(geneNitrogenSignificance[moduleGenes3, 1]),
                   xlab = paste("Module Membership in", module3, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
                  abline=TRUE)
#pink
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
                   abs(geneSulfideSignificance[moduleGenes4, 1]),
                   xlab = paste("Module Membership in", module4, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
                   abs(geneOxygenSignificance[moduleGenes4, 1]),
                   xlab = paste("Module Membership in", module4, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
                   abs(geneHydrogenSignificance[moduleGenes4, 1]),
                   xlab = paste("Module Membership in", module4, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes4, column4]),
                   abs(geneNitrogenSignificance[moduleGenes4, 1]),
                   xlab = paste("Module Membership in", module4, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module4,
                   abline=TRUE)

#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))   


####black and blue
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
                   abs(geneSulfideSignificance[moduleGenes5, 1]),
                   xlab = paste("Module Membership in", module5, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
                   abs(geneOxygenSignificance[moduleGenes5, 1]),
                   xlab = paste("Module Membership in", module5, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
                   abs(geneHydrogenSignificance[moduleGenes5, 1]),
                   xlab = paste("Module Membership in", module5, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
                   abs(geneNitrogenSignificance[moduleGenes5, 1]),
                   xlab = paste("Module Membership in", module5, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
                  abline=TRUE)
#blue
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
                   abs(geneSulfideSignificance[moduleGenes6, 1]),
                   xlab = paste("Module Membership in", module6, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
                   abs(geneOxygenSignificance[moduleGenes6, 1]),
                   xlab = paste("Module Membership in", module6, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
                   abs(geneHydrogenSignificance[moduleGenes6, 1]),
                   xlab = paste("Module Membership in", module6, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes6, column6]),
                   abs(geneNitrogenSignificance[moduleGenes6, 1]),
                   xlab = paste("Module Membership in", module6, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module6,
                   abline=TRUE)

#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))   

# darkcyan and green (7 and 8)
#darkcyan
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
                   abs(geneSulfideSignificance[moduleGenes7, 1]),
                   xlab = paste("Module Membership in", module7, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
                   abs(geneOxygenSignificance[moduleGenes7, 1]),
                   xlab = paste("Module Membership in", module7, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
                   abs(geneHydrogenSignificance[moduleGenes7, 1]),
                   xlab = paste("Module Membership in", module7, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
                   abs(geneNitrogenSignificance[moduleGenes7, 1]),
                   xlab = paste("Module Membership in", module7, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
                  abline=TRUE)
#green
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
                   abs(geneSulfideSignificance[moduleGenes8, 1]),
                   xlab = paste("Module Membership in", module8, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
                   abs(geneOxygenSignificance[moduleGenes8, 1]),
                   xlab = paste("Module Membership in", module8, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
                   abs(geneHydrogenSignificance[moduleGenes8, 1]),
                   xlab = paste("Module Membership in", module8, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes8, column8]),
                   abs(geneNitrogenSignificance[moduleGenes8, 1]),
                   xlab = paste("Module Membership in", module8, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module8,
                   abline=TRUE)

#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))   


# greenyellow and grey60
#greenyellow
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
                   abs(geneSulfideSignificance[moduleGenes9, 1]),
                   xlab = paste("Module Membership in", module9, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
                   abs(geneOxygenSignificance[moduleGenes9, 1]),
                   xlab = paste("Module Membership in", module9, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
                   abs(geneHydrogenSignificance[moduleGenes9, 1]),
                   xlab = paste("Module Membership in", module9, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes9, column9]),
                   abs(geneNitrogenSignificance[moduleGenes9, 1]),
                   xlab = paste("Module Membership in", module9, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module9,
                  abline=TRUE)
#grey60
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
                   abs(geneSulfideSignificance[moduleGenes10, 1]),
                   xlab = paste("Module Membership in", module10, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
                   abs(geneOxygenSignificance[moduleGenes10, 1]),
                   xlab = paste("Module Membership in", module10, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
                   abs(geneHydrogenSignificance[moduleGenes10, 1]),
                   xlab = paste("Module Membership in", module10, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes10, column10]),
                   abs(geneNitrogenSignificance[moduleGenes10, 1]),
                   xlab = paste("Module Membership in", module10, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module10,
                   abline=TRUE)

#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))   

#lightyellow and magenta (11,12)
#lightyellow
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
                   abs(geneSulfideSignificance[moduleGenes11, 1]),
                   xlab = paste("Module Membership in", module11, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
                   abs(geneOxygenSignificance[moduleGenes11, 1]),
                   xlab = paste("Module Membership in", module11, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
                   abs(geneHydrogenSignificance[moduleGenes11, 1]),
                   xlab = paste("Module Membership in", module11, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes11, column11]),
                   abs(geneNitrogenSignificance[moduleGenes11, 1]),
                   xlab = paste("Module Membership in", module11, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module11,
                  abline=TRUE)
#magenta
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
                   abs(geneSulfideSignificance[moduleGenes12, 1]),
                   xlab = paste("Module Membership in", module12, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
                   abs(geneOxygenSignificance[moduleGenes12, 1]),
                   xlab = paste("Module Membership in", module12, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
                   abs(geneHydrogenSignificance[moduleGenes12, 1]),
                   xlab = paste("Module Membership in", module12, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes12, column12]),
                   abs(geneNitrogenSignificance[moduleGenes12, 1]),
                   xlab = paste("Module Membership in", module12, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module12,
                   abline=TRUE)

str(geneSulfideSignificance)
## 'data.frame':    2500 obs. of  1 variable:
##  $ Sul.Sulfide: num  0.1156 0.2704 0.3413 -0.1966 -0.0622 ...
what <- geneSulfideSignificance
what$locusID <- rownames(what)
what <- what[,c(2,1)]
# Writing the dataframe to a CSV file so I can use this in cytoscape for a layer
##write.csv(what, file = "geneSulfideSignificance.csv", row.names = FALSE)
#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))   

#purple and salmon (13,14)
#purple
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
                   abs(geneSulfideSignificance[moduleGenes13, 1]),
                   xlab = paste("Module Membership in", module13, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
                   abs(geneOxygenSignificance[moduleGenes13, 1]),
                   xlab = paste("Module Membership in", module13, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
                   abs(geneHydrogenSignificance[moduleGenes13, 1]),
                   xlab = paste("Module Membership in", module13, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes13, column13]),
                   abs(geneNitrogenSignificance[moduleGenes13, 1]),
                   xlab = paste("Module Membership in", module13, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module13,
                  abline=TRUE)
#salmon
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
                   abs(geneSulfideSignificance[moduleGenes14, 1]),
                   xlab = paste("Module Membership in", module14, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
                   abs(geneOxygenSignificance[moduleGenes14, 1]),
                   xlab = paste("Module Membership in", module14, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
                   abs(geneHydrogenSignificance[moduleGenes14, 1]),
                   xlab = paste("Module Membership in", module14, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
                   abs(geneNitrogenSignificance[moduleGenes14, 1]),
                   xlab = paste("Module Membership in", module14, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
                   abline=TRUE)

#sizeGrWindow(7, 7);# uncomment when running interactively (not rendering in rmarkdown)
par(mfrow = c(2,4))
par(mar=c(5,4,2,2))   


#grey
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
                   abs(geneSulfideSignificance[moduleGenes15, 1]),
                   xlab = paste("Module Membership in", module15, "module"),
                   ylab = "Sulfide",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
                   abs(geneOxygenSignificance[moduleGenes15, 1]),
                   xlab = paste("Module Membership in", module15, "module"),
                   ylab = "Oxygen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
                   abs(geneHydrogenSignificance[moduleGenes15, 1]),
                   xlab = paste("Module Membership in", module15, "module"),
                   ylab = "Hydrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes15, column15]),
                   abs(geneNitrogenSignificance[moduleGenes15, 1]),
                   xlab = paste("Module Membership in", module15, "module"),
                   ylab = "Nitrogen",
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module15,
                   abline=TRUE)

######now I am just making the plots that will go next to the gene significance barplots that I made in the previous section, I will do the two most significant modules for each treatment condition

### for sulfide that is teal and gold


#sizeGrWindow(3,5);
par(mfrow = c(1,2))
par(mar=c(4,3,4,3)) 

verboseScatterplot(abs(geneModuleMembership[moduleGenes7, column7]),
                   abs(geneSulfideSignificance[moduleGenes7, 1]),
                   xlab = paste("Module Membership in teal module"),
                   ylab = "Sulfide",
                   pch = 16,
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module7,
                   abline=TRUE)
verboseScatterplot(abs(geneModuleMembership[moduleGenes3, column3]),
                   abs(geneSulfideSignificance[moduleGenes3, 1]),
                   xlab = paste("Module Membership in gold module"),
                   ylab = "Sulfide",
                   pch = 16,
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module3,
                   abline=TRUE)

#sizeGrWindow(3,5);
par(mfrow = c(1,2))
par(mar=c(4,3,4,3)) 

#### for oxygen in black and purple modules


verboseScatterplot(abs(geneModuleMembership[moduleGenes14, column14]),
                   abs(geneOxygenSignificance[moduleGenes14, 1]),
                   xlab = paste("Module Membership in purple"), 
                   ylab = "Oxygen",
                   pch = 16,
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module14,
                   abline = TRUE, xaxt = "n") # Suppress x-axis



verboseScatterplot(abs(geneModuleMembership[moduleGenes5, column5]),
                   abs(geneOxygenSignificance[moduleGenes5, 1]),
                   xlab = paste("Module Membership in black"),
                   ylab = "oxygen",
                   pch = 16,
                   cex.main = 1.0, cex.lab = 1.0, cex.axis = 1.0, col = module5,
                   abline=TRUE)

genes of significance for sulfide limiting response (a version of “hub” genes)

####genes of significance in teal module (darkcyan) under Sulfide 
#geneSulfideSignificance

FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkcyan)>.8 & SulPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE  TRUE 
##  2450    50
head(FilterGenes)
##                    Sul.Sulfide
## gene-L0Y14_RS04070       FALSE
## gene-L0Y14_RS09775       FALSE
## gene-L0Y14_RS01990       FALSE
## gene-L0Y14_RS09950       FALSE
## gene-L0Y14_RS09955       FALSE
## gene-L0Y14_RS12365       FALSE
Sulfideteal_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Sulfideteal_filtered <- gsub("e.L", "e-L", Sulfideteal_filtered)

gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"

Sulfideteal_filtered <- as.data.frame(Sulfideteal_filtered)
names(Sulfideteal_filtered)[1] <- "locusID"
str(Sulfideteal_filtered)
## 'data.frame':    50 obs. of  1 variable:
##  $ locusID: chr  "gene-L0Y14_RS07425" "gene-L0Y14_RS07440" "gene-L0Y14_RS07430" "gene-L0Y14_RS07435" ...
Sulfideteal_filtered <- left_join(Sulfideteal_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Sulfideteal_filtered)
## [1] 50  2
names(Sulfideteal_filtered)
## [1] "locusID" "gene"
list <- 1:50
condition <- rep("Sulfide",length(list))
moduleFil <- rep("teal",length(list))
a <- cbind(Sulfideteal_filtered,condition,moduleFil)

sul.teal <- a


####genes of significance in gold module under Sulfide
#geneSulfideSignificance
FilterGenes= abs(geneSulfideSignificance)> .2 & abs(geneModuleMembership$MMdarkgoldenrod1)>.8 & SulPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE  TRUE 
##  2478    22
Sulfidegold_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Sulfidegold_filtered <- gsub("e.L", "e-L", Sulfidegold_filtered)

gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"

Sulfidegold_filtered <- as.data.frame(Sulfidegold_filtered)
names(Sulfidegold_filtered)[1] <- "locusID"
str(Sulfidegold_filtered)
## 'data.frame':    22 obs. of  1 variable:
##  $ locusID: chr  "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080" "gene-L0Y14_RS12030" ...
Sulfidegold_filtered <- left_join(Sulfidegold_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Sulfidegold_filtered)
## [1] 22  2
names(Sulfidegold_filtered)
## [1] "locusID" "gene"
list <- 1:22
condition <- rep("Sulfide",length(list))
moduleFil <- rep("darkgoldenrod1",length(list))
d <- cbind(Sulfidegold_filtered,condition,moduleFil)
sulfide_hub_gold <- d

genes of significance for sulfide limiting response (a version of “hub” genes)

####genes of significance in black module under Oxygen 
#geneOxygenSignificance
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMblack)>.8 & OxyPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE  TRUE 
##  2467    33
head(FilterGenes)
##                    Oxy.Oxygen
## gene-L0Y14_RS04070      FALSE
## gene-L0Y14_RS09775      FALSE
## gene-L0Y14_RS01990      FALSE
## gene-L0Y14_RS09950      FALSE
## gene-L0Y14_RS09955      FALSE
## gene-L0Y14_RS12365      FALSE
Oxygenblack_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Oxygenblack_filtered <- gsub("e.L", "e-L", Oxygenblack_filtered)

gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"

Oxygenblack_filtered <- as.data.frame(Oxygenblack_filtered)
names(Oxygenblack_filtered)[1] <- "locusID"
str(Oxygenblack_filtered)
## 'data.frame':    33 obs. of  1 variable:
##  $ locusID: chr  "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400" "gene-L0Y14_RS05880" ...
Oxygenblack_filtered <- left_join(Oxygenblack_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Oxygenblack_filtered)
## [1] 33  2
names(Oxygenblack_filtered)
## [1] "locusID" "gene"
list <- 1:33
condition <- rep("Oxygen",length(list))
moduleFil <- rep("black",length(list))
oxy_b <- cbind(Oxygenblack_filtered,condition,moduleFil)


#####



names(geneModuleMembership)
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
names(geneModuleMembership)
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
####genes of significance in purple (darkorchid) module under Oxygen
#geneOxygenSignificance
names(geneModuleMembership)
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8 & OxyPvalue <= 0.05
table(FilterGenes)
## FilterGenes
## FALSE  TRUE 
##  2476    24
Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)

gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"

Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
names(Oxygenpurple_filtered)[1] <- "locusID"
str(Oxygenpurple_filtered)
## 'data.frame':    24 obs. of  1 variable:
##  $ locusID: chr  "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Oxygenpurple_filtered)
## [1] 24  2
names(Oxygenpurple_filtered)
## [1] "locusID" "gene"
list <- 1:24
condition <- rep("Oxygen",length(list))
moduleFil <- rep("darkorchid4",length(list))
oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)

#####



names(geneModuleMembership)
##  [1] "MMmidnightblue"   "MMdarkorchid4"    "MMyellow"         "MMgreenyellow"   
##  [5] "MMblack"          "MMdarkgreen"      "MMdeeppink3"      "MMpink"          
##  [9] "MMgrey60"         "MMbrown"          "MMplum4"          "MMskyblue"       
## [13] "MMdarkcyan"       "MMdarkgoldenrod1" "MMgrey"
####genes of significance in purple module under Oxygen
FilterGenes= abs(geneOxygenSignificance)> .2 & abs(geneModuleMembership$MMdarkorchid4)>.8
table(FilterGenes)
## FilterGenes
## FALSE  TRUE 
##  2476    24
Oxygenpurple_filtered <- dimnames(data.frame(WGCNA_matrix))[[2]][FilterGenes]
Oxygenpurple_filtered <- gsub("e.L", "e-L", Oxygenpurple_filtered)

gene_names <- read.csv("gene_names_for_wgcna_matrix_files.csv",header=TRUE)
gene_names <- gene_names[,2:3]
names(gene_names)[1] <- "locusID"

Oxygenpurple_filtered <- as.data.frame(Oxygenpurple_filtered)
names(Oxygenpurple_filtered)[1] <- "locusID"
str(Oxygenpurple_filtered)
## 'data.frame':    24 obs. of  1 variable:
##  $ locusID: chr  "gene-L0Y14_RS09775" "gene-L0Y14_RS16350" "gene-L0Y14_RS12700" "gene-L0Y14_RS07425" ...
Oxygenpurple_filtered <- left_join(Oxygenpurple_filtered,gene_names)
## Joining with `by = join_by(locusID)`
dim(Oxygenpurple_filtered)
## [1] 24  2
names(Oxygenpurple_filtered)
## [1] "locusID" "gene"
list <- 1:24
condition <- rep("Oxygen",length(list))
moduleFil <- rep("purple",length(list))
oxy_purple <- cbind(Oxygenpurple_filtered,condition,moduleFil)

build file for cytoscape

lnames = load(file = "wgcna_manual_network20220910.RData")
lnames =load(file="count_matrix_condition_info_dataInput.RData")

TOM = TOMsimilarityFromExpr(WGCNA_matrix, power = 8,
                            networkType = "signed hybrid",
                            TOMType = "signed")
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
moduleColors = mergedColors

###below we will just use all genes, but module=c("list of color modules you could filter by","eg.black","red")
#module= moduleColors

#module2=c("brown","cyan","midnightblue","blue","magenta","turquoise","black","salmon","pink","green","turqoise")
          

inModule = is.finite(match(moduleColors, moduleColors))

#inModule2 = is.finite(match(moduleColors, module2))
#table(inModule2)

genes = colnames(WGCNA_matrix)
modGenes = genes[inModule]
#modGenes2=genes[inModule2]

modTOM = TOM[inModule, inModule]
#modTOM2 = TOM[inModule2,inModule2]
dimnames(modTOM) = list(modGenes, modGenes)
#dimnames(modTOM2) = list(modGenes2, modGenes2)


genes_of_note <- read.csv("enzymes_of_note.csv")
names(genes_of_note)
## [1] "broad_function"     "function."          "gene"              
## [4] "locus_ID_andre_new" "X"
genes_of_note <- genes_of_note[,1:4]
names(genes_of_note)[4] <- "modGenes"
cool <- genes_of_note[,c(4,3,2,1)]

write.csv(cool,"cool_genes.csv")


modGenes3 <- as.data.frame(modGenes)
names(modGenes3)[1] <- "modGenes"



#modGenes4 <- as.data.frame(modGenes2)
#names(modGenes4)[1] <- "modGenes"
#str(modGenes4)



gene_annos <- right_join(modGenes3,cool)
## Joining with `by = join_by(modGenes)`
#gene_annos2 <- right_join(modGenes4,cool)


names(gene_annos)
## [1] "modGenes"       "gene"           "function."      "broad_function"
cool_gene_list <- gene_annos[,1]
cool_gene_list <- as.data.frame(cool_gene_list)
names(cool_gene_list)[1] <- "modGenes"


#names(gene_annos2)
#cool_gene_list2 <- gene_annos2[,1]
#cool_gene_list2 <- as.data.frame(cool_gene_list2)
#names(cool_gene_list2)[1] <- "modGenes"

modcool <- inner_join(modGenes3,cool_gene_list)
## Joining with `by = join_by(modGenes)`
#modcool2 <- inner_join(modGenes4,cool_gene_list2)


uncool <- modGenes3[ ! modGenes3$modGenes %in% modcool$modGenes,]
#uncool2 <- modGenes4[ ! modGenes4$modGenes %in% modcool2$modGenes,]
uncool <- as.data.frame(uncool)
#uncool2 <- as.data.frame(uncool2)
names(uncool)[1] <- "modGenes"
#names(uncool2)[1] <- "modGenes"

from_gff <- read.delim("GCF_023733635.1_ASM2373363v1_feature_table.txt")
from_gff$locus_tag = paste0('gene-', from_gff$locus_tag)
names(from_gff)[17] <- "modGenes"
names(from_gff)
##  [1] "X..feature"              "class"                  
##  [3] "assembly"                "assembly_unit"          
##  [5] "seq_type"                "chromosome"             
##  [7] "genomic_accession"       "start"                  
##  [9] "end"                     "strand"                 
## [11] "product_accession"       "non.redundant_refseq"   
## [13] "related_accession"       "name"                   
## [15] "symbol"                  "GeneID"                 
## [17] "modGenes"                "feature_interval_length"
## [19] "product_length"          "attributes"
names(from_gff)[1] <- "feature"
gff2 <- subset(from_gff,feature=="CDS")
names(gff2)
##  [1] "feature"                 "class"                  
##  [3] "assembly"                "assembly_unit"          
##  [5] "seq_type"                "chromosome"             
##  [7] "genomic_accession"       "start"                  
##  [9] "end"                     "strand"                 
## [11] "product_accession"       "non.redundant_refseq"   
## [13] "related_accession"       "name"                   
## [15] "symbol"                  "GeneID"                 
## [17] "modGenes"                "feature_interval_length"
## [19] "product_length"          "attributes"
gff3 <- gff2[,c(17,14)]
gff4 <- gff2[,c(17,14,11)]






coolg <- left_join(modcool,cool)
## Joining with `by = join_by(modGenes)`
coolg <- left_join(coolg,gff4)
## Joining with `by = join_by(modGenes)`
coolg <- coolg[,c(1,2,6)]

#coolg2 <- left_join(modcool2,cool)
#coolg2 <- left_join(coolg2,gff4)
#coolg2 <- coolg2[,c(1,2,6)]

str(gff4)
## 'data.frame':    3213 obs. of  3 variables:
##  $ modGenes         : chr  "gene-L0Y14_RS00005" "gene-L0Y14_RS00010" "gene-L0Y14_RS00015" "gene-L0Y14_RS00020" ...
##  $ name             : chr  "chromosomal replication initiator protein DnaA" "DNA polymerase III subunit beta" "IS3 family transposase" "hypothetical protein" ...
##  $ product_accession: chr  "WP_005958812.1" "WP_006474992.1" "" "WP_138921860.1" ...
#str(uncool2)
#names(uncool)[1] <- "modGenes"
uncoolg <- left_join(uncool,gff3)
## Joining with `by = join_by(modGenes)`
#uncoolg2 <- left_join(uncool2,gff4)
names(uncoolg)[2] <- "gene"
coolg <- coolg[,1:2]
all_genes6 <- rbind(coolg,uncoolg)


#names(uncool2)[1] <- "modGenes"
#uncoolg2 <- left_join(uncool2,gff3)
#uncoolg3 <- left_join(uncool2,gff4)
#names(uncoolg3)[2] <- "gene"
#names(coolg2)
all_genes <- rbind(coolg,uncoolg)

write.csv(all_genes6,"gene_names_for_wgcna_matrix_files_sig_module.csv")


vec1 <- all_genes6[,1]

vec2 <- modGenes
head(vec1)
## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS01990" "gene-L0Y14_RS12365"
## [4] "gene-L0Y14_RS06850" "gene-L0Y14_RS02365" "gene-L0Y14_RS07425"
reorder_idx <- match(vec2,vec1)

all_genes <- vec1[reorder_idx]
all_genes3 <- as.data.frame(all_genes)
names(all_genes3)[1] <- "modGenes"

genes2 <- left_join(all_genes3,all_genes6)
## Joining with `by = join_by(modGenes)`
genes777 <- genes2[,2]
genes888 <- genes2[,2]



dimnames(modTOM) = list(all_genes, all_genes)
#str(modGenes2)
#moduleColors[inModule]
#head(modTOM2)
#str(modTOM2)
head(all_genes)
## [1] "gene-L0Y14_RS04070" "gene-L0Y14_RS09775" "gene-L0Y14_RS01990"
## [4] "gene-L0Y14_RS09950" "gene-L0Y14_RS09955" "gene-L0Y14_RS12365"
#str(all_genes2)

set.seed(123)
###all_genes2 needs to be a list not dataframe!




cyt_nofilter=cyt = exportNetworkToCytoscape(modTOM,
                               edgeFile = "CytoscapeEdgeFile20230201.txt",
                               nodeFile = "CytoscapeNodeFile20230201.txt",
                               weighted = TRUE,
                               threshold = 0.0,
                               nodeNames = all_genes,
                               altNodeNames = genes777,
                               nodeAttr = moduleColors[inModule])






cyt_all = exportNetworkToCytoscape(modTOM,
                               edgeFile = "CytoscapeEdgeFile20231201_0.05.txt",
                               nodeFile = "CytoscapeNodeFile20231201_0.05.txt",
                               weighted = TRUE,
                               threshold = 0.05,
                               nodeNames = all_genes,
                               altNodeNames = genes888,
                               nodeAttr = moduleColors[inModule])



file_path <- "CytoscapeNodeFile20231201_0.05.txt"  

lines <- readLines(file_path)
cleaned_data <- list()
column_names <- NULL
# Loop through the lines and process data as needed
for (line in lines) {
  # Split the line using tabs as delimiters
  elements <- unlist(strsplit(line, "\t"))
  
  if (is.null(column_names)) {
    # If column names are not set, use the first line as column names
    column_names <- elements
  } else {
    # Process elements as needed (e.g., remove extra data, format, etc.)
    # Append cleaned data to the list
    cleaned_data <- append(cleaned_data, list(elements))
  }
}

final_data <- as.data.frame(do.call(rbind, cleaned_data), stringsAsFactors = FALSE)
colnames(final_data) <- column_names

table(final_data[,3])
## 
##          black          brown       darkcyan darkgoldenrod1      darkgreen 
##             98            196            214             79            130 
##    darkorchid4      deeppink3    greenyellow           grey         grey60 
##             71            199            136             32            155 
##   midnightblue           pink          plum4        skyblue         yellow 
##             59            337             92            106             41
# Replace all instances of color names to match those used in paper in the entire data frame
final_data <- lapply(final_data, function(x) ifelse(x == "darkorchid4", "purple", x))
final_data <- as.data.frame(final_data)

final_data <- lapply(final_data, function(x) ifelse(x == "plum4", "light_purple", x))
final_data <- as.data.frame(final_data)

final_data <- lapply(final_data, function(x) ifelse(x == "darkcyan", "teal", x))
final_data <- as.data.frame(final_data)

final_data <- lapply(final_data, function(x) ifelse(x == "deeppink3", "cherry", x))
final_data <- as.data.frame(final_data)

final_data <- lapply(final_data, function(x) ifelse(x == "yellow", "light_yellow", x))
final_data <- as.data.frame(final_data)


final_data <- lapply(final_data, function(x) ifelse(x == "darkgoldenrod1", "gold", x))
final_data <- as.data.frame(final_data)

final_data <- lapply(final_data, function(x) ifelse(x == "darkgreen","green", x))
final_data <- as.data.frame(final_data)



#write.table(final_data, file = "output_file.txt", sep = "\t", quote = FALSE, row.names = FALSE)

GSvsMS figures

###mean_significance_of_module_for_conditionSulfide############################################################



par(mfrow = c(2,1))

colorh1 <- mergedColors

GS1=as.numeric(cor(biconditions$Sulfide,WGCNA_matrix, use="p"))
head(GS1)
## [1]  0.11559790  0.27041835  0.34126255 -0.19662226 -0.06219137 -0.23047838
GeneSignificance=abs(GS1)
head(GeneSignificance)
## [1] 0.11559790 0.27041835 0.34126255 0.19662226 0.06219137 0.23047838
str(colorh1)
##  chr [1:2500] "plum4" "darkorchid4" "midnightblue" "grey60" "grey60" ...
ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)
str(ModuleSignificance)
##  num [1:15(1d)] 0.138 0.193 0.309 0.368 0.133 ...
##  - attr(*, "dimnames")=List of 1
##   ..$ : chr [1:15] "black" "brown" "darkcyan" "darkgoldenrod1" ...
geneSulfideSignificance=as.data.frame(cor(WGCNA_matrix, Sulfide, use = "p"))
SulPvalue = as.data.frame(corPvalueStudent(as.matrix(geneSulfideSignificance), nSamples))
names(geneSulfideSignificance) = paste("Sul.", names(Sulfide), sep="");
names(SulPvalue) = paste("p.Sul.", names(Sulfide), sep="");

x_limits <- range(
  abs(geneModuleMembership[moduleGenes7, column7]),
  abs(geneModuleMembership[moduleGenes3, column3])
)

head(geneModuleMembership)
##                    MMmidnightblue MMdarkorchid4    MMyellow MMgreenyellow
## gene-L0Y14_RS04070     -0.2978002    -0.4742295 -0.06228793   -0.12283434
## gene-L0Y14_RS09775      0.6475847     0.8479211  0.16301606    0.38986765
## gene-L0Y14_RS01990      0.7683016     0.2909617 -0.21720940    0.03127418
## gene-L0Y14_RS09950      0.6250107     0.4452151  0.21104415    0.01837192
## gene-L0Y14_RS09955      0.5993774     0.4088890 -0.08634607   -0.20976909
## gene-L0Y14_RS12365      0.3632049     0.1311813 -0.06645301   -0.38765511
##                        MMblack MMdarkgreen MMdeeppink3      MMpink   MMgrey60
## gene-L0Y14_RS04070 -0.16971124 -0.05481641 -0.01719957 -0.01598375 -0.6563363
## gene-L0Y14_RS09775  0.53953441  0.44692677  0.41381637  0.59720934  0.1830737
## gene-L0Y14_RS01990  0.06471391 -0.09289025  0.08717597  0.11976742  0.2550905
## gene-L0Y14_RS09950  0.43427234  0.22863891  0.17923724  0.10431595  0.4740972
## gene-L0Y14_RS09955  0.23088121 -0.01544034  0.08834925 -0.04765631  0.5107174
## gene-L0Y14_RS12365 -0.00202547 -0.12064102 -0.25455458 -0.31551755  0.7070535
##                       MMbrown    MMplum4  MMskyblue  MMdarkcyan
## gene-L0Y14_RS04070 -0.1384809  0.6483345  0.2465064  0.25589117
## gene-L0Y14_RS09775 -0.2948900 -0.5986951 -0.2859013 -0.84047590
## gene-L0Y14_RS01990  0.2365179 -0.1068451 -0.5795192 -0.47056380
## gene-L0Y14_RS09950  0.2737269 -0.2366527 -0.6256319 -0.43994105
## gene-L0Y14_RS09955  0.4747395 -0.3438988 -0.6771603 -0.39111707
## gene-L0Y14_RS12365  0.4675125 -0.2198442 -0.5555037 -0.09885019
##                    MMdarkgoldenrod1      MMgrey
## gene-L0Y14_RS04070       -0.1725039 -0.34377603
## gene-L0Y14_RS09775       -0.4423334  0.33425164
## gene-L0Y14_RS01990       -0.5786492 -0.10188510
## gene-L0Y14_RS09950       -0.3808909  0.07218213
## gene-L0Y14_RS09955       -0.3278772  0.27804497
## gene-L0Y14_RS12365       -0.2081241  0.06962918
# First Plot (plotModuleSignificance)
plotModuleSignificance(GeneSignificance, colorh1,
                       legend.text = TRUE,
                       main = "Average gene significance in modules with sulfide variable",
                       cex.axis = 1)


help(plotModuleSignificance)

table(colorh1)
## colorh1
##          black          brown       darkcyan darkgoldenrod1      darkgreen 
##            124            233            239             99            143 
##    darkorchid4      deeppink3    greenyellow           grey         grey60 
##            100            214            160            242            203 
##   midnightblue           pink          plum4        skyblue         yellow 
##             83            385            109            120             46
par(mar = c(2, 3, 2, 3))
par(mfrow = c(2,1))

###mean_significance_of_module_for_condition_Oxygen#########################################################
sizeGrWindow(8,7)
par(mar = c(2, 3, 2, 3))
par(mfrow = c(2,1))



GS1=as.numeric(cor(biconditions$Oxygen,WGCNA_matrix, use="p"))
GeneSignificance=abs(GS1)
ModuleSignificance=tapply(GeneSignificance, colorh1, mean, na.rm=T)

#sizeGrWindow(8,7)
#par(mfrow = c(1,3))
plotModuleSignificance(GeneSignificance,colorh1,
                       legend.text=TRUE,
                       main = "Gene significance in oxygen treatments across modules",
                       cex.axis=1)
help(plotModuleSignificance)

# Plot for the purple module with suppressed x-axis

#y_limits <- range(
 # abs(geneOxygenSignificance[moduleGenes14, 1]),
  #abs(geneOxygenSignificance[moduleGenes5, 1])
#)

x_limits <- range(abs(geneModuleMembership[moduleGenes14, column14]),
                  abs(geneModuleMembership[moduleGenes5, column5]))

upset r plot nitrogen metabolism

nitro <- read.csv("Merged Network_2 default node.csv")
#names(nitro)

nirS <- subset(nitro,nirS=="1")

#nitro$shared.name

#(nitro$neighbors666)

rtca6 <-nitro[,c(133,69)] 
  
  
nitro1 <- nitro[c(1:55,57:894),c(133,69,106,3,28,38,39,65,66,108,109,120,121,125)]
#nitro1$shared.name
#names(nitro1)

names(nitro1)[3] <- "cbb"
nitro1$cbb <-factor(ifelse(nitro1$cbb=="cbb",1,0))
table(nitro1$cbb)
## 
##   0   1 
## 373 520
names(nitro1)[2] <- "rtca"
nitro1$rtca <-factor(ifelse(nitro1$rtca=="rtca",1,0))
table(nitro1$rtca)
## 
##   0   1 
## 745 148
indx <- sapply(nitro1, is.factor)
nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))

str(nitro1)
## 'data.frame':    893 obs. of  14 variables:
##  $ shared.name     : chr  "gene-L0Y14_RS16060" "gene-L0Y14_RS01710" "gene-L0Y14_RS01760" "gene-L0Y14_RS01755" ...
##  $ rtca            : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cbb             : num  1 1 1 1 1 1 1 1 1 0 ...
##  $ amtB            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ gogat_glnA      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hoa1            : int  1 NA NA 1 NA 1 NA NA 1 NA ...
##  $ hoa2            : int  NA NA NA NA NA 1 1 NA NA NA ...
##  $ nap             : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ narGHI          : int  NA NA NA NA NA NA NA NA 1 NA ...
##  $ nirBD_nasA_nark1: int  1 NA NA 1 1 NA NA NA 1 NA ...
##  $ nirS            : int  NA NA NA NA NA NA 1 NA NA NA ...
##  $ norCB           : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ nosZ_cluster    : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ otr             : int  NA NA NA NA NA 1 1 NA 1 NA ...
nitro1[is.na(nitro1)] = 0
indx <- sapply(nitro1, is.factor)
nitro1[indx] <- lapply(nitro1[indx], function(x) as.numeric(as.character(x)))

upset(nitro1,sets = c("rtca","cbb" ,"amtB","gogat_glnA","hoa1","hoa2","otr","nirBD_nasA_nark1","nap","narGHI","nirS","norCB","nosZ_cluster"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE, order)

upset r plot amino acid metabolism metabolism

letslook <- read.csv("aa_cbb_rtca_merged_with_fixed_rtca_cbb default  node.csv")
#names(letslook)
letslook <- letslook[1:1052,c(131,60:109)]
rtca1 <- subset(letslook, neighbors1000 =="rtca")
#rtca1$shared.name
#names(letslook)
table(letslook$neighbors1000)
## 
##      rtca 
##  904  148
names(letslook)[5] <- "rtca"
letslook$rtca <-factor(ifelse(letslook$rtca=="rtca",1,0))
table(letslook$rtca)
## 
##   0   1 
## 904 148
#names(letslook)
table(letslook$neighbhors184)
## 
##                  lysine_synthesis 
##              836              216
which(colnames(letslook)=="neighbhors184")
## [1] 3
names(letslook)[which(colnames(letslook)=="neighbhors184")] <- "lysine_synthesis"
letslook$lysine_synthesis <-factor(ifelse(letslook$lysine_synthesis=="lysine_synthesis",1,0))
table(letslook$lysine_synthesis)
## 
##   0   1 
## 836 216
#names(letslook)
table(letslook$neighbors666)
## 
##     cbb 
## 532 520
which(colnames(letslook)=="neighbors666")
## [1] 51
names(letslook)[which(colnames(letslook)=="neighbors666")] <- "cbb"
letslook$cbb <-factor(ifelse(letslook$cbb=="cbb",1,0))
table(letslook$cbb)
## 
##   0   1 
## 532 520
#names(letslook)
table(letslook$neighbors185)
## 
##                    arginine_synthesis 
##               1010                 42
names(letslook)[which(colnames(letslook)=="neighbors185")] <- "arginine_synthesis"
letslook$arginine_synthesis <-factor(ifelse(letslook$arginine_synthesis=="arginine_synthesis",1,0))
table(letslook$arginine_synthesis)
## 
##    0    1 
## 1010   42
#names(letslook)
table(letslook$neighbors183)
## 
##                     ornithine_synthesis 
##                 648                 404
names(letslook)[which(colnames(letslook)=="neighbors183")] <- "ornithine_synthesis"
letslook$ornithine_synthesis <-factor(ifelse(letslook$ornithine_synthesis=="ornithine_synthesis",1,0))
table(letslook$ornithine_synthesis)
## 
##   0   1 
## 648 404
#names(letslook)
table(letslook$neighbors182)
## 
##                     citruline_synthesis 
##                 941                 111
names(letslook)[which(colnames(letslook)=="neighbors182")] <- "citruline_synthesis"
letslook$citruline_synthesis <-factor(ifelse(letslook$citruline_synthesis=="citruline_synthesis",1,0))
table(letslook$citruline_synthesis)
## 
##   0   1 
## 941 111
#names(letslook)
table(letslook$neighbors181)
## 
##                  asparagine_synth 
##              756              296
names(letslook)[which(colnames(letslook)=="neighbors181")] <- "asparagine_synth"
letslook$asparagine_synth <-factor(ifelse(letslook$asparagine_synth=="asparagine_synth",1,0))
table(letslook$asparagine_synth)
## 
##   0   1 
## 756 296
#names(letslook)
table(letslook$neighbors186)
## 
##           shikimate 
##       674       378
names(letslook)[which(colnames(letslook)=="neighbors186")] <- "shikimate"
letslook$shikimate <-factor(ifelse(letslook$shikimate=="shikimate",1,0))
table(letslook$shikimate)
## 
##   0   1 
## 674 378
#names(letslook)
table(letslook$neighbors187)
## 
##                      tryptophan_synthesis 
##                  961                   91
names(letslook)[which(colnames(letslook)=="neighbors187")] <- "tryptophan_synthesis"
letslook$tryptophan_synthesis <-factor(ifelse(letslook$tryptophan_synthesis=="tryptophan_synthesis",1,0))
table(letslook$tryptophan_synthesis)
## 
##   0   1 
## 961  91
#names(letslook)
table(letslook$neighbors189)
## 
##                              phenylalanine_tyrosine_synth 
##                         1029                           23
names(letslook)[which(colnames(letslook)=="neighbors189")] <- "phenylalanine_tyrosine_synth"
letslook$phenylalanine_tyrosine_synth <-factor(ifelse(letslook$phenylalanine_tyrosine_synth=="phenylalanine_tyrosine_synth",1,0))
table(letslook$phenylalanine_tyrosine_synth)
## 
##    0    1 
## 1029   23
#names(letslook)
table(letslook$neighbors200)
## 
##                     histidine_synthesis 
##                 836                 216
names(letslook)[which(colnames(letslook)=="neighbors200")] <- "histidine_synthesis"
letslook$histidine_synthesis <-factor(ifelse(letslook$histidine_synthesis=="histidine_synthesis",1,0))
table(letslook$histidine_synthesis)
## 
##   0   1 
## 836 216
#names(letslook)
table(letslook$neighbors202)
## 
##                  valine_synthesis 
##             1003               49
names(letslook)[which(colnames(letslook)=="neighbors202")] <- "valine_synthesis"
letslook$valine_synthesis <-factor(ifelse(letslook$valine_synthesis=="valine_synthesis",1,0))
table(letslook$valine_synthesis)
## 
##    0    1 
## 1003   49
#names(letslook)
table(letslook$neighbors190)
## 
##                              luecine_isoluecine_synthesis 
##                         1002                           50
names(letslook)[which(colnames(letslook)=="neighbors190")] <- "luecine_isoluecine_synthesis"
letslook$luecine_isoluecine_synthesis <-factor(ifelse(letslook$luecine_isoluecine_synthesis=="luecine_isoluecine_synthesis",1,0))
table(letslook$luecine_isoluecine_synthesis)
## 
##    0    1 
## 1002   50
#names(letslook)

indx <- sapply(letslook, is.factor)
letslook[indx] <- lapply(letslook[indx], function(x) as.numeric(as.character(x)))


upset(letslook,sets = c("rtca","cbb" ,"lysine_synthesis","asparagine_synth","citruline_synthesis","ornithine_synthesis","arginine_synthesis","shikimate","tryptophan_synthesis","phenylalanine_tyrosine_synth","luecine_isoluecine_synthesis","histidine_synthesis","valine_synthesis"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)

upset r plot sulfide metabolism

sulfy <- read.csv("cfix_sulfide_merged_binary default  node.csv")
#names(sulfy)

sulfy1 <- sulfy[1:890,c(131:135,127,129,5,22,27,68,107)]

which(colnames(sulfy1)=="sbp")
## [1] 7
#names(sulfy1)
names(sulfy1)[12] <- "cbb"
sulfy1$cbb <-factor(ifelse(sulfy1$cbb=="cbb",1,0))
table(sulfy1$cbb)
## 
##   0   1 
## 370 520
names(sulfy1)[11] <- "rtca"
sulfy1$rtca <-factor(ifelse(sulfy1$rtca=="rtca",1,0))
table(sulfy1$rtca)
## 
##   0   1 
## 741 149
indx <- sapply(sulfy1, is.factor)
sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))

#str(sulfy1)
sulfy1[is.na(sulfy1)] = 0
indx <- sapply(sulfy1, is.factor)
sulfy1[indx] <- lapply(sulfy1[indx], function(x) as.numeric(as.character(x)))

#names(sulfy1)
upset(sulfy1,sets = c("rtca","cbb" ,"sqr","fccA","sbp","sox","soe","sreA","dsr_tusA","apr_sat","qmoABhdrCB"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)

upset r hydrogenases, atpases, etc

electron <- read.csv("rtca_cbb_hyd_etc_atpases_merged_binary default  node.csv")
#names(electron)

electro1 <- electron[,c(94,68,70,12,25,38,39,63,84,86,106,107)]

#names(electro1)
names(electro1)[3] <- "cbb"
electro1$cbb <-factor(ifelse(electro1$cbb=="cbb",1,0))
table(electro1$cbb)
## 
##   0   1 
## 485 520
names(electro1)[2] <- "rtca"
electro1$rtca <-factor(ifelse(electro1$rtca=="rtca",1,0))
table(electro1$rtca)
## 
##   0   1 
## 856 149
indx <- sapply(electro1, is.factor)
electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))

#str(electro1)
electro1[is.na(electro1)] = 0
indx <- sapply(electro1, is.factor)
electro1[indx] <- lapply(electro1[indx], function(x) as.numeric(as.character(x)))

#names(electro1)
upset(electro1,sets = c("rtca","cbb" ,"nuo","pet","cco","mbx","F1atpase","V1atpase","V2atpase","hyd1e","hyd3b"), mb.ratio = c(0.55, 0.45), order.by = "freq",empty.intersections = "off", keep.order = TRUE)

DE expression analysis of significant genes, classified as hubs, in significant modules

de<-read.csv('entotal_202200815_use_this_corrected_locus_tags.csv',header=TRUE,stringsAsFactors=FALSE)

sig<-de[de$adj.P.Val<=0.05,]
sig0<-sig[sig$logFC<=-1,]
sig1<-sig[sig$logFC>=1,]
sig<-rbind(sig0,sig1)


a<-subset(sig,comparison=='LS_vs_HS_noN_HO')
b<-subset(sig,comparison=='LS_vs_HS_N_HO')
c<-subset(sig,comparison=='H2_vs_HS_N_HO')
d<-subset(sig,comparison=='H2_vs_HS_noN_HO')
e<-subset(sig,comparison=='SW_vs_HS_noN_HO')
f<-subset(sig,comparison=='H2_vs_LS_N_HO')
g<-subset(sig,comparison=='H2_vs_LS_noN_HO')
h<-subset(sig,comparison=='SW_vs_LS_noN_HO')

#i<-subset(sig,comparison=='LO_vs_HO_H2_N')no de!!!
j <- subset(sig,comparison=='LO_vs_HO_H2_noN')
k <- subset(sig,comparison=='LO_vs_HO_HS_N')

l <- subset(sig,comparison=='noN_vs_N_H2_HO')
m <- subset(sig,comparison=='noN_vs_N_HS_HO')
n <- subset(sig,comparison=='noN_vs_N_LS_HO')

sulfide<-rbind(a,b,c,d,e,f,g,h)


no_hydrogen_sulfide<-rbind(a,b,e,h)

sulfide_nolowSvsH2 <- rbind(a,b,c,d,e)

oxygen <- rbind(j,k)

nitrogen <- rbind(l,m,n)

my.colors.sulfide<-
scale_color_manual(values=c(
'LS_vs_HS_noN_HO'='#A982AA',
'LS_vs_HS_N_HO'='#8E4162',
'H2_vs_HS_N_HO'='#404E7C',
'H2_vs_HS_noN_HO'='#6C91BF',
'SW_vs_HS_noN_HO'='#EEA58C',
'H2_vs_LS_N_HO'='#7EA172',
'H2_vs_LS_noN_HO'='#C7CB85',
'SW_vs_LS_noN_HO'='#000000'))


my.colors.oxygen <-
scale_color_manual(values=c(
  'LO_vs_HO_HS_N'="#022b3a",
  'LO_vs_HO_H2_noN'="#2a9d8f"))

my.colors.nitrogen <- 
  scale_color_manual(values=c(
    'noN_vs_N_H2_HO'="#264653",
    'noN_vs_N_HS_HO'="#2a9d8f",
    'noN_vs_N_LS_HO'="#e9c46a"))
    
    
my.colors.sulfide_nolowSvsH2 <- 
  scale_color_manual(values=c(
  'LS_vs_HS_noN_HO'='#A982AA',
'LS_vs_HS_N_HO'='#8E4162',
'H2_vs_HS_N_HO'='#404E7C',
'H2_vs_HS_noN_HO'='#6C91BF',
'SW_vs_HS_noN_HO'='#EEA58C'))



my.theme <- theme(axis.title= element_text(face = "plain",size = 20),
                  axis.text = element_text(face = "plain",size = 15, colour = "black"),
                  plot.caption = element_text(hjust = 0),
                  panel.background=element_rect(fill="white"),
                  panel.border=element_rect(colour="black",fill=NA,linewidth = 1),
                  axis.text.x = element_text(angle = 90),
                  axis.line.x.top = element_blank(),
                  )

#####setup for individual comparisons

sig$Threshold<-cut(sig$logFC,breaks=c(-Inf,1,Inf),labels=c("<=1",">1"))

LS_vs_HS_noN_HO <-subset(sig,comparison=='LS_vs_HS_noN_HO')
LS_vs_HS_N_HO <-subset(sig,comparison=='LS_vs_HS_N_HO')
H2_vs_HS_N_HO <-subset(sig,comparison=='H2_vs_HS_N_HO')
H2_vs_HS_noN_HO <-subset(sig,comparison=='H2_vs_HS_noN_HO')
SW_vs_HS_noN_HO <-subset(sig,comparison=='SW_vs_HS_noN_HO')
LO_vs_HO_H2_noN<- subset(sig,comparison=='LO_vs_HO_H2_noN')
LO_vs_HO_HS_N <- subset(sig,comparison=='LO_vs_HO_HS_N')

H2_vs_LS_N_HO<-subset(sig,comparison=='H2_vs_LS_N_HO')
H2_vs_LS_noN_HO<-subset(sig,comparison=='H2_vs_LS_noN_HO')
SW_vs_LS_noN_HO<-subset(sig,comparison=='SW_vs_LS_noN_HO')

noN_vs_N_H2_HO <- subset(sig,comparison=='noN_vs_N_H2_HO')
noN_vs_N_HS_HO <- subset(sig,comparison=='noN_vs_N_HS_HO')
noN_vs_N_LS_HO <- subset(sig,comparison=='noN_vs_N_LS_HO')

my.colors.ind.comparisons <- 
  scale_color_manual(values=c(
  '>1'='#F2444D',
'<=1'='#3BD4CC'))

###### sulfide in gold 


names(sulfide_hub_gold)
## [1] "locusID"   "gene"      "condition" "moduleFil"
names(sulfide_hub_gold)[2] <- "Predicted_function"
names(sulfide_nolowSvsH2)
##  [1] "X.1"                          "locus_ID_andre_new"          
##  [3] "geneid"                       "locus_ID_andre_old"          
##  [5] "start.x"                      "stop.x"                      
##  [7] "gene_letters_andre"           "locus_tag_andre"             
##  [9] "accession_andre"              "start"                       
## [11] "stop"                         "notes"                       
## [13] "Ontology"                     "Inference"                   
## [15] "locus_tag"                    "product"                     
## [17] "protein_id"                   "gene_letters.notes"          
## [19] "NCBI_ID"                      "annotations_from_andre"      
## [21] "target_id"                    "Accession"                   
## [23] "Predicted_function"           "functional_role"             
## [25] "sub_role"                     "sub_role2"                   
## [27] "Protein"                      "Accession...Protein"         
## [29] "seed_ortholog_evalue"         "seed_ortholog_score"         
## [31] "best_tax_level"               "Preferred_name"              
## [33] "GOs"                          "X"                           
## [35] "KEGG_ko"                      "KEGG_Pathway"                
## [37] "KEGG_Module"                  "KEGG_Reaction"               
## [39] "KEGG_rclass"                  "BRITE"                       
## [41] "KEGG_TC"                      "CAZy"                        
## [43] "BiGG_Reaction"                "tax_scope"                   
## [45] "eggNOG_OGs"                   "bestOG"                      
## [47] "COG_Functional_Category"      "eggNOG_free_text_description"
## [49] "logFC"                        "AveExpr"                     
## [51] "t"                            "P.value"                     
## [53] "adj.P.Val"                    "B"                           
## [55] "comparison"
names(sulfide_nolowSvsH2)[2] <- "locusID"

names(sulfide_nolowSvsH2)
##  [1] "X.1"                          "locusID"                     
##  [3] "geneid"                       "locus_ID_andre_old"          
##  [5] "start.x"                      "stop.x"                      
##  [7] "gene_letters_andre"           "locus_tag_andre"             
##  [9] "accession_andre"              "start"                       
## [11] "stop"                         "notes"                       
## [13] "Ontology"                     "Inference"                   
## [15] "locus_tag"                    "product"                     
## [17] "protein_id"                   "gene_letters.notes"          
## [19] "NCBI_ID"                      "annotations_from_andre"      
## [21] "target_id"                    "Accession"                   
## [23] "Predicted_function"           "functional_role"             
## [25] "sub_role"                     "sub_role2"                   
## [27] "Protein"                      "Accession...Protein"         
## [29] "seed_ortholog_evalue"         "seed_ortholog_score"         
## [31] "best_tax_level"               "Preferred_name"              
## [33] "GOs"                          "X"                           
## [35] "KEGG_ko"                      "KEGG_Pathway"                
## [37] "KEGG_Module"                  "KEGG_Reaction"               
## [39] "KEGG_rclass"                  "BRITE"                       
## [41] "KEGG_TC"                      "CAZy"                        
## [43] "BiGG_Reaction"                "tax_scope"                   
## [45] "eggNOG_OGs"                   "bestOG"                      
## [47] "COG_Functional_Category"      "eggNOG_free_text_description"
## [49] "logFC"                        "AveExpr"                     
## [51] "t"                            "P.value"                     
## [53] "adj.P.Val"                    "B"                           
## [55] "comparison"
sulfide_comp_DE <- sulfide_nolowSvsH2[,c(2,3,5,6,7,9:14,16:55)]

shg_vector <- sulfide_hub_gold[,1]
shg_vector
##  [1] "gene-L0Y14_RS01985" "gene-L0Y14_RS07695" "gene-L0Y14_RS00080"
##  [4] "gene-L0Y14_RS12030" "gene-L0Y14_RS12025" "gene-L0Y14_RS12035"
##  [7] "gene-L0Y14_RS11990" "gene-L0Y14_RS11995" "gene-L0Y14_RS12020"
## [10] "gene-L0Y14_RS12015" "gene-L0Y14_RS12045" "gene-L0Y14_RS12000"
## [13] "gene-L0Y14_RS11980" "gene-L0Y14_RS11975" "gene-L0Y14_RS11965"
## [16] "gene-L0Y14_RS12040" "gene-L0Y14_RS11985" "gene-L0Y14_RS12010"
## [19] "gene-L0Y14_RS11255" "gene-L0Y14_RS10315" "gene-L0Y14_RS10410"
## [22] "gene-L0Y14_RS12005"
shg_DE <- subset(sulfide_comp_DE, locusID %in% shg_vector)

shg_DE_names_added <- shg_DE %>% 
  left_join(sulfide_hub_gold, by = "locusID", suffix = c("", "_sulfide_hub_gold"))

names(shg_DE_names_added)
##  [1] "locusID"                             "geneid"                             
##  [3] "start.x"                             "stop.x"                             
##  [5] "gene_letters_andre"                  "accession_andre"                    
##  [7] "start"                               "stop"                               
##  [9] "notes"                               "Ontology"                           
## [11] "Inference"                           "product"                            
## [13] "protein_id"                          "gene_letters.notes"                 
## [15] "NCBI_ID"                             "annotations_from_andre"             
## [17] "target_id"                           "Accession"                          
## [19] "Predicted_function"                  "functional_role"                    
## [21] "sub_role"                            "sub_role2"                          
## [23] "Protein"                             "Accession...Protein"                
## [25] "seed_ortholog_evalue"                "seed_ortholog_score"                
## [27] "best_tax_level"                      "Preferred_name"                     
## [29] "GOs"                                 "X"                                  
## [31] "KEGG_ko"                             "KEGG_Pathway"                       
## [33] "KEGG_Module"                         "KEGG_Reaction"                      
## [35] "KEGG_rclass"                         "BRITE"                              
## [37] "KEGG_TC"                             "CAZy"                               
## [39] "BiGG_Reaction"                       "tax_scope"                          
## [41] "eggNOG_OGs"                          "bestOG"                             
## [43] "COG_Functional_Category"             "eggNOG_free_text_description"       
## [45] "logFC"                               "AveExpr"                            
## [47] "t"                                   "P.value"                            
## [49] "adj.P.Val"                           "B"                                  
## [51] "comparison"                          "Predicted_function_sulfide_hub_gold"
## [53] "condition"                           "moduleFil"
tesstes <- shg_DE_names_added[,c(19,28,52)]
shg_DE2 <- shg_DE_names_added[,c(1:18,20:54)]
names(shg_DE2)
##  [1] "locusID"                             "geneid"                             
##  [3] "start.x"                             "stop.x"                             
##  [5] "gene_letters_andre"                  "accession_andre"                    
##  [7] "start"                               "stop"                               
##  [9] "notes"                               "Ontology"                           
## [11] "Inference"                           "product"                            
## [13] "protein_id"                          "gene_letters.notes"                 
## [15] "NCBI_ID"                             "annotations_from_andre"             
## [17] "target_id"                           "Accession"                          
## [19] "functional_role"                     "sub_role"                           
## [21] "sub_role2"                           "Protein"                            
## [23] "Accession...Protein"                 "seed_ortholog_evalue"               
## [25] "seed_ortholog_score"                 "best_tax_level"                     
## [27] "Preferred_name"                      "GOs"                                
## [29] "X"                                   "KEGG_ko"                            
## [31] "KEGG_Pathway"                        "KEGG_Module"                        
## [33] "KEGG_Reaction"                       "KEGG_rclass"                        
## [35] "BRITE"                               "KEGG_TC"                            
## [37] "CAZy"                                "BiGG_Reaction"                      
## [39] "tax_scope"                           "eggNOG_OGs"                         
## [41] "bestOG"                              "COG_Functional_Category"            
## [43] "eggNOG_free_text_description"        "logFC"                              
## [45] "AveExpr"                             "t"                                  
## [47] "P.value"                             "adj.P.Val"                          
## [49] "B"                                   "comparison"                         
## [51] "Predicted_function_sulfide_hub_gold" "condition"                          
## [53] "moduleFil"
names(shg_DE2)[51] <- "Predicted_function"

#write_csv(shg_DE2,"shg_DE2.csv")

###now I have to reorder this nonsense
list(unique(shg_DE2$locusID))
## [[1]]
##  [1] "gene-L0Y14_RS10410" "gene-L0Y14_RS00080" "gene-L0Y14_RS01985"
##  [4] "gene-L0Y14_RS07695" "gene-L0Y14_RS11995" "gene-L0Y14_RS11990"
##  [7] "gene-L0Y14_RS11985" "gene-L0Y14_RS11965" "gene-L0Y14_RS10315"
## [10] "gene-L0Y14_RS11255" "gene-L0Y14_RS11980" "gene-L0Y14_RS12045"
## [13] "gene-L0Y14_RS12000" "gene-L0Y14_RS11975" "gene-L0Y14_RS12005"
## [16] "gene-L0Y14_RS12015" "gene-L0Y14_RS12020" "gene-L0Y14_RS12010"
## [19] "gene-L0Y14_RS12040" "gene-L0Y14_RS12030" "gene-L0Y14_RS12025"
## [22] "gene-L0Y14_RS12035"
list(unique(shg_DE2$Predicted_function))
## [[1]]
##  [1] "leuC"                               "nitrogen regulation protein NR(II)"
##  [3] "nirB"                               "frdB_1"                            
##  [5] "flxD"                               "hdrA"                              
##  [7] "korC"                               "hdrB"                              
##  [9] "U32 family peptidase"               "DNA repair protein RecN"           
## [11] "korB_1"                             "mdh_1"                             
## [13] "flxC"                               "korA_1"                            
## [15] "flxB"                               "aclB_full"                         
## [17] "aclA"                               "flxA"                              
## [19] "pntB"                               "acnA"                              
## [21] "idh"                                "pntA"
order <- c("hdrB","korA_1","korB_1","korC","hdrA","flxD","flxC","flxB","flxA","aclB_full","aclA","idh","acnA","pntA","pntB","mdh_1","frdB_1","nitrogen regulation protein NR(II)","nirB","leuC","U32 family peptidase", "DNA repair protein RecN")



shg_DE.plot <-ggplot(shg_DE2,
aes(x=factor(Predicted_function,levels = order), y=logFC,color=comparison))

shg_DE.plot +
  ggtitle("Hub genes in gold module")+
  geom_jitter(size=5,shape=18, width = 0.32, height = 0.32)+
  geom_vline(xintercept = c(1.5,2.5,3.5, 4.5, 5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5,16.5,17.5,18.5,19.5,20.5,21.5), color = "grey") +
  my.colors.sulfide_nolowSvsH2 +
  my.theme 

####now doing this for sulfide_teal, the otehr module that is significant for sulfide
names(sul.teal)
## [1] "locusID"   "gene"      "condition" "moduleFil"
names(sul.teal)[2] <- "Predicted_function"

sht_vector <- sul.teal[,1]
#sht_vector
sht_DE <- subset(sulfide_comp_DE, locusID %in% sht_vector)
#names(sht_DE)

sht_DE_names_added <- sht_DE %>% 
  left_join(sul.teal, by = "locusID", suffix = c("", "_sulfide_hub_teal"))

#names(sht_DE_names_added)
sht_DE2 <- sht_DE_names_added[,c(1:18,20:54)]
#names(sht_DE2)
names(sht_DE2)[51] <- "Predicted_function"


names_shtDE <- unique(sht_DE$locusID)

setdiff(names_shtDE,sht_vector)
## character(0)
### shortening some of the names

#write.csv(sht_DE2,"sht_DE2.csv")
getwd()
## [1] "/Users/jessicamitchell/Library/CloudStorage/OneDrive-HarvardUniversity/OD_documents/Cfixpaper"
###pulling up dataframe with gene names and functions added
sht_DE3 <- read.csv("sht_DE2.csv")



sht_DE3.plot <-ggplot(sht_DE3,
aes(x=factor(Preferred_name), y=logFC,color=comparison))

sht_DE3.plot +
  ggtitle("Hub genes in teal module")+
  geom_point(size=5,shape=18) +
  my.colors.sulfide_nolowSvsH2 +
  my.theme 

###Going to have to add my own short names to make above figure run right...
###### oxygen in purple 

names(oxy_purple)
## [1] "locusID"   "gene"      "condition" "moduleFil"
names(oxy_purple)[2] <- "Predicted_function"

names(oxygen)[2] <- "locusID"

ohp_vector <- oxy_purple[,1]
#ohp_vector
ohp_DE <- subset(oxygen, locusID %in% ohp_vector)
#names(ohp_DE)

###just the locusID and Preferred_name 

ohp_DE1 <- ohp_DE[,c(1,6,28)]
ohp_DE2 <- unique(ohp_DE1)
ohp3 <- merge(ohp_DE2, oxy_purple)

names(ohp3)[3] <- "short.names"

#names(ohp_DE)
#write_csv(ohp3, "ohp4.csv")
#added short.names and notes
ohp4 <- read.csv("ohp4.csv")

ohp_DE3 <- merge(ohp_DE,ohp4,  by = "locusID")

ohp_DE.plot <-ggplot(ohp_DE3,
aes(x=factor(short.names), y=logFC,color=comparison))





ohp_DE.plot +
  ggtitle("Hub genes in purple module")+
  geom_point(size=5,shape=18) +
  my.colors.oxygen +
  my.theme 

###### oxygen in black

#names(oxy_b)

names(oxy_b)[2] <- "Predicted_function"



ohb_vector <- oxy_b[,1]
ohb_vector
##  [1] "gene-L0Y14_RS14085" "gene-L0Y14_RS12670" "gene-L0Y14_RS08400"
##  [4] "gene-L0Y14_RS05880" "gene-L0Y14_RS13180" "gene-L0Y14_RS15950"
##  [7] "gene-L0Y14_RS14595" "gene-L0Y14_RS13130" "gene-L0Y14_RS14970"
## [10] "gene-L0Y14_RS03820" "gene-L0Y14_RS07270" "gene-L0Y14_RS04045"
## [13] "gene-L0Y14_RS03375" "gene-L0Y14_RS14700" "gene-L0Y14_RS11330"
## [16] "gene-L0Y14_RS15890" "gene-L0Y14_RS06760" "gene-L0Y14_RS00450"
## [19] "gene-L0Y14_RS08420" "gene-L0Y14_RS10875" "gene-L0Y14_RS00105"
## [22] "gene-L0Y14_RS01330" "gene-L0Y14_RS16130" "gene-L0Y14_RS02180"
## [25] "gene-L0Y14_RS08830" "gene-L0Y14_RS06620" "gene-L0Y14_RS01145"
## [28] "gene-L0Y14_RS11140" "gene-L0Y14_RS14660" "gene-L0Y14_RS09840"
## [31] "gene-L0Y14_RS03075" "gene-L0Y14_RS06585" "gene-L0Y14_RS08700"
ohb_DE <- subset(oxygen, locusID %in% ohb_vector)
#names(ohb_DE)

###just the locusID,accession and Preferred_name 

ohb <- ohb_DE[,c(2,9,20,32)]

ohb <- unique(ohb)
names(ohb)[4] <- "short.names"


#write_csv(ohb, "ohb2.csv")
#added short.names and notes
ohb2 <- read.csv("ohb2.csv")

ohb2_DE3 <- merge(ohb_DE,ohb2,  by = "locusID")

ohb2_DE3.plot <-ggplot(ohb2_DE3,
aes(x=factor(short.names), y=logFC,color=comparison))





ohb2_DE3.plot +
  ggtitle("Hub genes in black module")+
  geom_point(size=5,shape=18) +
  my.colors.oxygen +
  my.theme 

average logFCs

######putting all of this together, so that I have a workable file I can use in adobe illustrator

all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
names(all_together)
## [1] "locusID"       "short.names"   "moduleFil"     "func_category"
## [5] "X"
all_together <- all_together[,1:4]



shg_DE_av <- shg_DE2[,c(1,44,50,53)]

head(shg_DE_av)
##              locusID     logFC      comparison      moduleFil
## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
## 4 gene-L0Y14_RS07695  4.361391 LS_vs_HS_noN_HO darkgoldenrod1
## 5 gene-L0Y14_RS11995  2.598792 LS_vs_HS_noN_HO darkgoldenrod1
## 6 gene-L0Y14_RS11990  2.739793 LS_vs_HS_noN_HO darkgoldenrod1
# Calculate the average logFC for each locusID in gold sulfide 

avg_logFC_df <- shg_DE_av %>%
  group_by(locusID, moduleFil) %>%
  summarise(
    avg_logFC = mean(logFC, na.rm = TRUE),
    sd_logFC = sd(logFC, na.rm = TRUE)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- shg_DE_av %>%
  group_by(locusID, moduleFil) %>%
  summarise(
    n = n(), # Add the count of non-NA values
    avg_logFC = mean(logFC, na.rm = TRUE),
    sd_logFC = sd(logFC, na.rm = TRUE)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 5
##   locusID            moduleFil          n avg_logFC sd_logFC
##   <chr>              <chr>          <int>     <dbl>    <dbl>
## 1 gene-L0Y14_RS00080 darkgoldenrod1     3     -4.58    0.732
## 2 gene-L0Y14_RS01985 darkgoldenrod1     3     -5.89    0.580
## 3 gene-L0Y14_RS07695 darkgoldenrod1     4      5.48    1.11 
## 4 gene-L0Y14_RS10315 darkgoldenrod1     3      2.38    0.487
## 5 gene-L0Y14_RS10410 darkgoldenrod1     3     -5.97    0.737
## 6 gene-L0Y14_RS11255 darkgoldenrod1     4      2.70    0.991
avg_logFC_df <- avg_logFC_df %>%
  mutate(treatment_logFC_ave = "logFC_sulfide_limiting")

# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
##   locusID            moduleFil          n avg_logFC sd_logFC treatment_logFC_ave
##   <chr>              <chr>          <int>     <dbl>    <dbl> <chr>              
## 1 gene-L0Y14_RS00080 darkgoldenrod1     3     -4.58    0.732 logFC_sulfide_limi…
## 2 gene-L0Y14_RS01985 darkgoldenrod1     3     -5.89    0.580 logFC_sulfide_limi…
## 3 gene-L0Y14_RS07695 darkgoldenrod1     4      5.48    1.11  logFC_sulfide_limi…
## 4 gene-L0Y14_RS10315 darkgoldenrod1     3      2.38    0.487 logFC_sulfide_limi…
## 5 gene-L0Y14_RS10410 darkgoldenrod1     3     -5.97    0.737 logFC_sulfide_limi…
## 6 gene-L0Y14_RS11255 darkgoldenrod1     4      2.70    0.991 logFC_sulfide_limi…
shg_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")

#write.csv(shg_DE_av2,"shg_DE_av2.csv")
dim(shg_DE_av2)
## [1] 22  9
n_val_sug <- shg_DE_av2[,c(1,3)]

####added "broad_category" column
shg_DE_av3 <- read.csv("shg_DE_av2.csv")
dim(shg_DE_av3)
## [1] 22 10
####now doing this for the teal sulfide module 
#names(sht_DE3)
v <- unique(sht_DE3$locusID)
sht_DE3_av <- sht_DE3[,c(2,45,51,54)]

head(sht_DE3_av)
##              locusID     logFC      comparison moduleFil
## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO      teal
## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO      teal
## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO      teal
## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO      teal
## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO      teal
## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO      teal
avg_logFC_df <- sht_DE3_av %>%
  group_by(locusID, moduleFil) %>%
  summarise(
    avg_logFC = mean(logFC, na.rm = TRUE),
    sd_logFC = sd(logFC, na.rm = TRUE)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- sht_DE3_av %>%
  group_by(locusID, moduleFil) %>%
  summarise(
    n = n(), # Add the count of non-NA values
    avg_logFC = mean(logFC, na.rm = TRUE),
    sd_logFC = sd(logFC, na.rm = TRUE)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 5
##   locusID            moduleFil     n avg_logFC sd_logFC
##   <chr>              <chr>     <int>     <dbl>    <dbl>
## 1 gene-L0Y14_RS00400 teal          4     -6.35   0.877 
## 2 gene-L0Y14_RS00515 teal          3      1.86   0.534 
## 3 gene-L0Y14_RS00525 teal          4      2.52   0.570 
## 4 gene-L0Y14_RS00625 teal          3      1.86   0.534 
## 5 gene-L0Y14_RS00655 teal          4      2.62   0.768 
## 6 gene-L0Y14_RS01660 teal          3     -2.42   0.0345
avg_logFC_df <- avg_logFC_df %>%
  mutate(treatment_logFC_ave = "logFC_sulfide_limiting")

# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave   
##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                 
## 1 gene-L0Y14_RS00400 teal          4     -6.35   0.877  logFC_sulfide_limiting
## 2 gene-L0Y14_RS00515 teal          3      1.86   0.534  logFC_sulfide_limiting
## 3 gene-L0Y14_RS00525 teal          4      2.52   0.570  logFC_sulfide_limiting
## 4 gene-L0Y14_RS00625 teal          3      1.86   0.534  logFC_sulfide_limiting
## 5 gene-L0Y14_RS00655 teal          4      2.62   0.768  logFC_sulfide_limiting
## 6 gene-L0Y14_RS01660 teal          3     -2.42   0.0345 logFC_sulfide_limiting
sht_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")

dim(sht_DE_av2)
## [1] 50  9
names(sht_DE_av2)
## [1] "locusID"             "moduleFil.x"         "n"                  
## [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
## [7] "short.names"         "moduleFil.y"         "func_category"
sht_n <- sht_DE_av2[,c(1,3)]

#write.csv(sht_DE_av2,"sht_DE_av2.csv")
#write.csv(sht_DE_av2,"sht_DE_av5.csv")
###added broad categories so I can use 
sht_DE_av3_old <- read.csv("sht_DE_av3.csv")
sht_DE_av5_new <- read.csv("sht_DE_av5.csv")


names(sht_DE_av5_new)
##  [1] "X"                   "locusID"             "moduleFil.x"        
##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
##  [7] "short.names"         "moduleFil.y"         "func_category"      
## [10] "broad_category"
names(sht_DE_av3_old)
## [1] "X"                   "locusID"             "moduleFil.x"        
## [4] "avg_logFC"           "treatment_logFC_ave" "short.names"        
## [7] "moduleFil.y"         "func_category"       "broad_category"
vec1 <- (sht_DE_av3_old)[2]
vec2 <- (sht_DE_av5_new)[2]
differences <- anti_join(vec1,vec2)
## Joining with `by = join_by(locusID)`
whatdiff <- merge(differences,sht_DE_av3_old, by = "locusID", all.x = TRUE)
whatdiff$short.names
##  [1] "xerC"              "Na_H_Exchanger"    "pth"              
##  [4] "dsrE_3"            "AAA_LonB"          "dxs"              
##  [7] "hypo"              "gsiA"              "recQ"             
## [10] "glyco_like_mftF " "tmk"               "frdA_1"           
## [13] "ppdk"              "cvpA"              "hflC"             
## [16] "YdcH"              "malQ"              "HpcH"             
## [19] "recD"              "gspB"              "trxA"             
## [22] "glgP"              "urvD"              "yidC"             
## [25] "yidD"
sht_DE_av3_old_filtered <-  sht_DE_av3_old %>%
  filter(locusID %in% unlist(vec2))

names(sht_DE_av5_new)
##  [1] "X"                   "locusID"             "moduleFil.x"        
##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
##  [7] "short.names"         "moduleFil.y"         "func_category"      
## [10] "broad_category"
sht_DE_new_justSD <- sht_DE_av5_new[,c(2,5)]
sht_DE_new <- merge(sht_DE_av3_old_filtered,sht_DE_new_justSD, by= "locusID",all.x = TRUE)




head(sht_DE_new)
##              locusID X moduleFil.x avg_logFC    treatment_logFC_ave short.names
## 1 gene-L0Y14_RS00400 2        teal -6.353622 logFC_sulfide_limiting        torF
## 2 gene-L0Y14_RS00515 3        teal  1.860711 logFC_sulfide_limiting        norB
## 3 gene-L0Y14_RS00525 4        teal  2.519077 logFC_sulfide_limiting        nirS
## 4 gene-L0Y14_RS00625 5        teal  1.860711 logFC_sulfide_limiting       norB1
## 5 gene-L0Y14_RS00655 6        teal  2.618491 logFC_sulfide_limiting       norD2
## 6 gene-L0Y14_RS01660 9        teal -2.423910 logFC_sulfide_limiting        psrA
##                       moduleFil.y           func_category broad_category
## 1 sulfide_teal_nitro_midnightblue                   porin      transport
## 2                    sulfide_teal         denitrification       nitrogen
## 3                    sulfide_teal         denitrification       nitrogen
## 4                    sulfide_teal         denitrification       nitrogen
## 5                    sulfide_teal         denitrification       nitrogen
## 6                    sulfide_teal nucleotide biosynthesis   biosynthesis
##     sd_logFC
## 1 0.87692331
## 2 0.53357947
## 3 0.57017737
## 4 0.53357947
## 5 0.76846294
## 6 0.03451484
#write.csv(sht_DE_new,"sht_DE_new.csv")

# Filter out 'unknown'
sht_DE_new_filtered <- sht_DE_new %>%
  filter(broad_category != "unknown")

names(sht_DE_new_filtered)
##  [1] "locusID"             "X"                   "moduleFil.x"        
##  [4] "avg_logFC"           "treatment_logFC_ave" "short.names"        
##  [7] "moduleFil.y"         "func_category"       "broad_category"     
## [10] "sd_logFC"
names(shg_DE_av3)
##  [1] "X"                   "locusID"             "moduleFil.x"        
##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
##  [7] "short.names"         "moduleFil.y"         "func_category"      
## [10] "broad_category"
sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
shg_DE <- shg_DE_av3[,c(2:10)]

dim(shg_DE)
## [1] 22  9
dim(sht_DE)
## [1] 42  9
gold_teal <- rbind(sht_DE,shg_DE)





gold_teal_filtered <- gold_teal %>%
  filter(broad_category != "unknown")


######putting them all together




 #reordering the gene names so they appear on th xaxis in the right order
sht_DE_new_filtered$short.names
##  [1] "torF"       "norB"       "nirS"       "norB1"      "norD2"     
##  [6] "psrA"       "fusA"       "trmD"       "ibpA"       "pyrG"      
## [11] "ftsH"       "rimP"       "nusA"       "fusA2"      "napG"      
## [16] "moaA"       "fusA_2"     "holA"       "hupE"       "HyiA_small"
## [21] "isp1"       "isp2"       "HyiB_large" "frdB_1"     "frdC_1"    
## [26] "htpX"       "cmoB"       "korB_2"     "korA_2"     "rhlE"      
## [31] "dnaJ"       "pfor_3"     "ispB"       "porin"      "clpB"      
## [36] "rho"        "ptsP_E1"    "feoB"       "mcrA"       "korA_3"    
## [41] "korB_3"     "pfor_like"
desired_order <- c("moaA","psrA","ispB" ,"pyrG","frdB_1","frdC_1","korA_2","korB_2","korA_3","korB_3", "pfor_3","pfor_like","fusA","fusA_2","fusA2","trmD","ibpA","nusA","mcrA" ,"recQ","holA","cmoB","dnaJ","ftsH","rimP","rhlE","rho","htpX","clpB","hupE","HyiB_large","isp1","isp2","HyiA_small","napG","nirS","norB" ,"norB1","norD2","feoB","porin","ptsP_E1", "torF")




sht_DE_new_filtered$short.names <- factor(sht_DE_new_filtered$short.names, levels = desired_order)

                                                    
                                                   
                                                   
                                                            
                                                           
                                                   
                                                              
                                                              
                                                              
                                                        
                     
 
sht_DE_new_filtered$clipped_avg_logFC <- pmin(pmax(sht_DE_new_filtered$avg_logFC, -4), 4)




ggplot(sht_DE_new_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ # specify breaks within this range

  labs(title = "Average logFC under sulfide limiting conditions",
       x = "Genes",
       y = "Average LogFC",
       color = "Avg LogFC Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  facet_grid(~broad_category,scales = "free",space="free")

######putting them all together
names(sht_DE_new_filtered)
##  [1] "locusID"             "X"                   "moduleFil.x"        
##  [4] "avg_logFC"           "treatment_logFC_ave" "short.names"        
##  [7] "moduleFil.y"         "func_category"       "broad_category"     
## [10] "sd_logFC"            "clipped_avg_logFC"
names(shg_DE_av3)
##  [1] "X"                   "locusID"             "moduleFil.x"        
##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
##  [7] "short.names"         "moduleFil.y"         "func_category"      
## [10] "broad_category"
sht_DE <- sht_DE_new_filtered[,c(1,3:10)]
shg_DE <- shg_DE_av3[,c(2:10)]

dim(shg_DE)
## [1] 22  9
dim(sht_DE)
## [1] 42  9
gold_teal <- rbind(sht_DE,shg_DE)

####breaking up this figure by positive and negative fold change
head(gold_teal)
##              locusID moduleFil.x avg_logFC    treatment_logFC_ave short.names
## 1 gene-L0Y14_RS00400        teal -6.353622 logFC_sulfide_limiting        torF
## 2 gene-L0Y14_RS00515        teal  1.860711 logFC_sulfide_limiting        norB
## 3 gene-L0Y14_RS00525        teal  2.519077 logFC_sulfide_limiting        nirS
## 4 gene-L0Y14_RS00625        teal  1.860711 logFC_sulfide_limiting       norB1
## 5 gene-L0Y14_RS00655        teal  2.618491 logFC_sulfide_limiting       norD2
## 6 gene-L0Y14_RS01660        teal -2.423910 logFC_sulfide_limiting        psrA
##                       moduleFil.y           func_category broad_category
## 1 sulfide_teal_nitro_midnightblue                   porin      transport
## 2                    sulfide_teal         denitrification       nitrogen
## 3                    sulfide_teal         denitrification       nitrogen
## 4                    sulfide_teal         denitrification       nitrogen
## 5                    sulfide_teal         denitrification       nitrogen
## 6                    sulfide_teal nucleotide biosynthesis   biosynthesis
##     sd_logFC
## 1 0.87692331
## 2 0.53357947
## 3 0.57017737
## 4 0.53357947
## 5 0.76846294
## 6 0.03451484
 ##Filter for avg_logFC > 0
sulfide_up <- gold_teal[gold_teal$avg_logFC > 0, ]

# Filter for avg_logFC < 0
sulfide_down <- gold_teal[gold_teal$avg_logFC < 0, ]


# Filter out 'unknown'
sulfide_up_filtered <- sulfide_up %>%
  filter(broad_category != "unknown")

sulfide_down_filtered <- sulfide_down %>%
  filter(broad_category != "unknown")

dim(sulfide_up_filtered)
## [1] 46  9
###removing the second frdB, since it was just sig for both the gold and the teal, it has the same fold change and everything...
sulfide_up_filtered <- sulfide_up_filtered[-28,]
###makes a clipping for the color (so its not all washed out)

sulfide_up_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_up_filtered$avg_logFC, -4), 4)

sulfide_down_filtered$clipped_avg_logFC <- pmin(pmax(sulfide_down_filtered$avg_logFC, -4), 4)

#write.csv(sulfide_up_filtered,"sulfide_up_filtered2.csv")
##reordered in excel, faster
SG_short.name_order <- read.csv("gold_teal_up_short.name.order.csv")

dim(SG_short.name_order)
## [1] 45 11
dim(sulfide_up_filtered)
## [1] 45 10
desired_order <- SG_short.name_order[,6]
list(desired_order)
## [[1]]
##  [1] "norB"       "nirS"       "norB1"      "norD2"      "napG"      
##  [6] "hupE"       "HyiA_small" "isp1"       "isp2"       "HyiB_large"
## [11] "frdB_1"     "frdC_1"     "korB_2"     "korA_2"     "pfor_3"    
## [16] "hdrB"       "korA_1"     "korB_1"     "korC"       "hdrA"      
## [21] "flxD"       "flxC"       "flxB"       "flxA"       "aclB_full" 
## [26] "aclA"       "idh"        "acnA"       "pntA"       "pntB"      
## [31] "mdh_1"      "ibpA"       "ftsH"       "fusA2"      "holA"      
## [36] "htpX"       "cmoB"       "dnaJ"       "porin"      "clpB"      
## [41] "peptidase"  "recN"       "ptsP_E1"    "feoB"       "moaA"
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)
levels(sulfide_up_filtered$short.names)
##  [1] "norB"       "nirS"       "norB1"      "norD2"      "napG"      
##  [6] "hupE"       "HyiA_small" "isp1"       "isp2"       "HyiB_large"
## [11] "frdB_1"     "frdC_1"     "korB_2"     "korA_2"     "pfor_3"    
## [16] "hdrB"       "korA_1"     "korB_1"     "korC"       "hdrA"      
## [21] "flxD"       "flxC"       "flxB"       "flxA"       "aclB_full" 
## [26] "aclA"       "idh"        "acnA"       "pntA"       "pntB"      
## [31] "mdh_1"      "ibpA"       "ftsH"       "fusA2"      "holA"      
## [36] "htpX"       "cmoB"       "dnaJ"       "porin"      "clpB"      
## [41] "peptidase"  "recN"       "ptsP_E1"    "feoB"       "moaA"
sulfide_up_filtered$short.names <- as.factor(sulfide_up_filtered$short.names)
str(sulfide_up_filtered)
## 'data.frame':    45 obs. of  10 variables:
##  $ locusID            : chr  "gene-L0Y14_RS00515" "gene-L0Y14_RS00525" "gene-L0Y14_RS00625" "gene-L0Y14_RS00655" ...
##  $ moduleFil.x        : chr  "teal" "teal" "teal" "teal" ...
##  $ avg_logFC          : num  1.86 2.52 1.86 2.62 4.09 ...
##  $ treatment_logFC_ave: chr  "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" "logFC_sulfide_limiting" ...
##  $ short.names        : Factor w/ 45 levels "norB","nirS",..: 1 2 3 4 32 33 34 5 45 35 ...
##  $ moduleFil.y        : chr  "sulfide_teal" "sulfide_teal" "sulfide_teal" "sulfide_teal" ...
##  $ func_category      : chr  "denitrification" "denitrification" "denitrification" "denitrification" ...
##  $ broad_category     : chr  "nitrogen" "nitrogen" "nitrogen" "nitrogen" ...
##  $ sd_logFC           : num  0.534 0.57 0.534 0.768 1.564 ...
##  $ clipped_avg_logFC  : num  1.86 2.52 1.86 2.62 4 ...
sulfide_up_filtered$short.names
##  [1] norB       nirS       norB1      norD2      ibpA       ftsH      
##  [7] fusA2      napG       moaA       holA       hupE       HyiA_small
## [13] isp1       isp2       HyiB_large frdB_1     frdC_1     htpX      
## [19] cmoB       korB_2     korA_2     dnaJ       pfor_3     porin     
## [25] clpB       ptsP_E1    feoB       peptidase  recN       hdrB      
## [31] korA_1     korB_1     korC       hdrA       flxD       flxC      
## [37] flxB       flxA       aclB_full  aclA       idh        acnA      
## [43] pntA       pntB       mdh_1     
## 45 Levels: norB nirS norB1 norD2 napG hupE HyiA_small isp1 isp2 ... moaA
##order of the genes in sulfide_up
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = desired_order)


# Extend the desired order with genes from sulfide_down that might not be in desired_order
extended_order <- unique(c(desired_order, as.character(sulfide_down_filtered$short.names)))
list(extended_order)
## [[1]]
##  [1] "norB"       "nirS"       "norB1"      "norD2"      "napG"      
##  [6] "hupE"       "HyiA_small" "isp1"       "isp2"       "HyiB_large"
## [11] "frdB_1"     "frdC_1"     "korB_2"     "korA_2"     "pfor_3"    
## [16] "hdrB"       "korA_1"     "korB_1"     "korC"       "hdrA"      
## [21] "flxD"       "flxC"       "flxB"       "flxA"       "aclB_full" 
## [26] "aclA"       "idh"        "acnA"       "pntA"       "pntB"      
## [31] "mdh_1"      "ibpA"       "ftsH"       "fusA2"      "holA"      
## [36] "htpX"       "cmoB"       "dnaJ"       "porin"      "clpB"      
## [41] "peptidase"  "recN"       "ptsP_E1"    "feoB"       "moaA"      
## [46] "torF"       "psrA"       "fusA"       "trmD"       "pyrG"      
## [51] "rimP"       "nusA"       "fusA_2"     "rhlE"       "ispB"      
## [56] "rho"        "mcrA"       "korA_3"     "korB_3"     "pfor_like" 
## [61] "glnL"       "nirB"       "leuC"
# Set factor levels for both datasets based on the extended order
sulfide_up_filtered$short.names <- factor(sulfide_up_filtered$short.names, levels = extended_order)
sulfide_down_filtered$short.names <- factor(sulfide_down_filtered$short.names, levels = extended_order)




####sulfide_up plot#################################################################
sulfide_up_plot <- ggplot(sulfide_up_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ 

  labs(title = "Average logFC under sulfide limiting conditions",
       x = "Genes",
       y = "Average LogFC",
       color = "Avg LogFC Value") +
  theme_minimal() +
 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  facet_grid(~broad_category,scales = "free",space="free")







####sulfide_down plot#################################################################
sulfide_down_plot <- ggplot(sulfide_down_filtered, aes(x = short.names, y = avg_logFC, color = clipped_avg_logFC)) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = avg_logFC - sd_logFC, ymax = avg_logFC + sd_logFC), width = 0.25) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+ 

  labs(title = "Average logFC under sulfide limiting conditions",
       x = "Genes",
       y = "Average LogFC",
       color = "Avg LogFC Value") +
  theme_minimal() +
  
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  facet_grid(~broad_category,scales = "free",space="free")



names(sulfide_up_filtered)
##  [1] "locusID"             "moduleFil.x"         "avg_logFC"          
##  [4] "treatment_logFC_ave" "short.names"         "moduleFil.y"        
##  [7] "func_category"       "broad_category"      "sd_logFC"           
## [10] "clipped_avg_logFC"
names(sulfide_down_filtered)
##  [1] "locusID"             "moduleFil.x"         "avg_logFC"          
##  [4] "treatment_logFC_ave" "short.names"         "moduleFil.y"        
##  [7] "func_category"       "broad_category"      "sd_logFC"           
## [10] "clipped_avg_logFC"
# Combine the datasets with an identifier for each
sulfide_up_filtered$set <- "Up"
sulfide_down_filtered$set <- "Down"

combined_dataS <- rbind(sulfide_up_filtered, sulfide_down_filtered)
# Reverse the levels so that "Up" comes before "Down"
combined_dataS$set <- factor(combined_dataS$set, levels = c("Up", "Down"))

combined_dataS <- combined_dataS %>%
  mutate(adj_avg_logFC = case_when(
    set == "Up" & avg_logFC < 0 ~ 0,
    set == "Down" & avg_logFC > 0 ~ 0,
    TRUE ~ avg_logFC
  ))

# Plotting
p <- ggplot(combined_dataS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
  geom_point(size = 3) +
  geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
  labs(title = "Average logFC under sulfide limiting conditions",
       x = "Adjusted Average LogFC",
       y = "Genes",
       color = "Avg LogFC Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(cols = vars(set), rows= vars(broad_category), scales = "free_y", space = "free_y")

print(p)

################################OXYGEN PURPLE ANDBLACK##################################################

#####now doing this for purple oxy

#names(ohp_DE3)
table(ohp_DE3$short.names)
## 
##   bamD   cphA   cvpA  dprE1    FBP  fusA2   GH16   hupE   hybO   hypE   hypo 
##      2      2      1      2      1      2      1      2      2      2      8 
##   isp2   mrcA   nifJ   ppdK   rocR SgpA_1   tsaC   yidC   yidD 
##      2      2      2      2      2      2      2      2      1
ohp_DE3_av <- ohp_DE3[,c(1,49,55,59,57)]

head(ohp_DE3_av)
##              locusID     logFC      comparison moduleFil short.names
## 1 gene-L0Y14_RS05880 -4.039158   LO_vs_HO_HS_N    purple        hypo
## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN    purple        hypo
## 3 gene-L0Y14_RS06635 -5.296420   LO_vs_HO_HS_N    purple       dprE1
## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN    purple       dprE1
## 5 gene-L0Y14_RS06820  2.418749 LO_vs_HO_H2_noN    purple       fusA2
## 6 gene-L0Y14_RS06820  2.176861   LO_vs_HO_HS_N    purple       fusA2
####dealing with the fact I have more than one genes that are called "hypo"


# Assign a unique number for each distinct locusID with "hypo" genes
locus_mapping <- ohp_DE3_av %>%
  filter(short.names == "hypo") %>%
  distinct(locusID) %>%
  mutate(hypo_num = row_number())

# Join this with the original dataframe and replace "hypo" genes
new_df <- ohp_DE3_av %>%
  left_join(locus_mapping, by = "locusID") %>%
  mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
                       paste0("hypo", hypo_num),
                       short.names)) %>%
  select(-hypo_num)  # Remove the auxiliary column

# Print the result
head(new_df)
##              locusID     logFC      comparison moduleFil short.names
## 1 gene-L0Y14_RS05880 -4.039158   LO_vs_HO_HS_N    purple       hypo1
## 2 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN    purple       hypo1
## 3 gene-L0Y14_RS06635 -5.296420   LO_vs_HO_HS_N    purple       dprE1
## 4 gene-L0Y14_RS06635 -4.281582 LO_vs_HO_H2_noN    purple       dprE1
## 5 gene-L0Y14_RS06820  2.418749 LO_vs_HO_H2_noN    purple       fusA2
## 6 gene-L0Y14_RS06820  2.176861   LO_vs_HO_HS_N    purple       fusA2
pox <- new_df




avg_logFC_df <- new_df %>%
  group_by(locusID, moduleFil) %>%
  summarise(avg_logFC = mean(logFC, na.rm = TRUE),
            sd_logFC = sd(logFC, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
###adding the n value too
avg_logFC_df <- new_df %>%
  group_by(locusID, moduleFil) %>%
  summarise(
    n = n(), # Add the count of non-NA values
    avg_logFC = mean(logFC, na.rm = TRUE),
    sd_logFC = sd(logFC, na.rm = TRUE)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 5
##   locusID            moduleFil     n avg_logFC sd_logFC
##   <chr>              <chr>     <int>     <dbl>    <dbl>
## 1 gene-L0Y14_RS05880 purple        2     -3.85    0.270
## 2 gene-L0Y14_RS06635 purple        2     -4.79    0.718
## 3 gene-L0Y14_RS06820 purple        2      2.30    0.171
## 4 gene-L0Y14_RS07045 purple        2      2.42    0.107
## 5 gene-L0Y14_RS07050 purple        2      4.01    1.56 
## 6 gene-L0Y14_RS07420 purple        2      3.16    0.175
avg_logFC_df <- avg_logFC_df %>%
  mutate(treatment_logFC_ave = "logFC_oxygen_limiting")

# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave  
##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                
## 1 gene-L0Y14_RS05880 purple        2     -3.85    0.270 logFC_oxygen_limiting
## 2 gene-L0Y14_RS06635 purple        2     -4.79    0.718 logFC_oxygen_limiting
## 3 gene-L0Y14_RS06820 purple        2      2.30    0.171 logFC_oxygen_limiting
## 4 gene-L0Y14_RS07045 purple        2      2.42    0.107 logFC_oxygen_limiting
## 5 gene-L0Y14_RS07050 purple        2      4.01    1.56  logFC_oxygen_limiting
## 6 gene-L0Y14_RS07420 purple        2      3.16    0.175 logFC_oxygen_limiting
ohp_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")

names(ohp_DE_av2)
## [1] "locusID"             "moduleFil.x"         "n"                  
## [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
## [7] "short.names"         "moduleFil.y"         "func_category"
poxn <- ohp_DE_av2[,c(1,3)]


######now doing this with oxy black#################



#names(ohb2_DE3)
ohb2_DE3_av <- ohb2_DE3[,c(1,49,55,59,57)]

head(ohb2_DE3_av)
##              locusID     logFC      comparison moduleFil3 short.names
## 1 gene-L0Y14_RS00450 -2.998400   LO_vs_HO_HS_N  black_oxy          Hr
## 2 gene-L0Y14_RS03075 -4.498757   LO_vs_HO_HS_N  black_oxy        hypo
## 3 gene-L0Y14_RS03375 -3.606201   LO_vs_HO_HS_N  black_oxy        fabA
## 4 gene-L0Y14_RS03820 -4.795972   LO_vs_HO_HS_N  black_oxy         HIT
## 5 gene-L0Y14_RS04045 -3.506534   LO_vs_HO_HS_N  black_oxy        cbbQ
## 6 gene-L0Y14_RS05880 -3.657596 LO_vs_HO_H2_noN  black_oxy        hypo
names(ohb2_DE3_av)[4] <- "moduleFil"





# again, I have the hypo issue
# Assign a unique number for each distinct locusID with "hypo" genes
locus_mapping <- ohb2_DE3_av %>%
  filter(short.names == "hypo") %>%
  distinct(locusID) %>%
  mutate(hypo_num = row_number())

# Join this with the original dataframe and replace "hypo" genes
new_df <- ohb2_DE3_av %>%
  left_join(locus_mapping, by = "locusID") %>%
  mutate(short.names = ifelse(!is.na(hypo_num) & short.names == "hypo",
                       paste0("hypo", hypo_num),
                       short.names)) %>%
  select(-hypo_num)  # Remove the auxiliary column

box <- new_df



avg_logFC_df <- new_df %>%
  group_by(locusID, moduleFil) %>%
  summarise(avg_logFC = mean(logFC, na.rm = TRUE),
            sd_logFC = sd(logFC, na.rm = TRUE)) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- new_df %>%
  group_by(locusID, moduleFil) %>%
  summarise(
    n = n(), # Add the count of non-NA values
    avg_logFC = mean(logFC, na.rm = TRUE),
    sd_logFC = sd(logFC, na.rm = TRUE)
  ) %>%
  ungroup()
## `summarise()` has grouped output by 'locusID'. You can override using the
## `.groups` argument.
avg_logFC_df <- avg_logFC_df %>%
  mutate(treatment_logFC_ave = "logFC_oxygen_limiting")

# Print the result
head(avg_logFC_df)
## # A tibble: 6 × 6
##   locusID            moduleFil     n avg_logFC sd_logFC treatment_logFC_ave  
##   <chr>              <chr>     <int>     <dbl>    <dbl> <chr>                
## 1 gene-L0Y14_RS00450 black_oxy     1     -3.00   NA     logFC_oxygen_limiting
## 2 gene-L0Y14_RS03075 black_oxy     1     -4.50   NA     logFC_oxygen_limiting
## 3 gene-L0Y14_RS03375 black_oxy     1     -3.61   NA     logFC_oxygen_limiting
## 4 gene-L0Y14_RS03820 black_oxy     1     -4.80   NA     logFC_oxygen_limiting
## 5 gene-L0Y14_RS04045 black_oxy     1     -3.51   NA     logFC_oxygen_limiting
## 6 gene-L0Y14_RS05880 black_oxy     2     -3.85    0.270 logFC_oxygen_limiting
ohb_DE_av2 <- merge(avg_logFC_df,all_together, by = "locusID")

boxn <- ohb_DE_av2[,c(1,3)]


oxy_purple_black <- rbind(ohb_DE_av2,ohp_DE_av2)


names(oxy_purple_black)
## [1] "locusID"             "moduleFil.x"         "n"                  
## [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
## [7] "short.names"         "moduleFil.y"         "func_category"
#write.csv(oxy_purple_black,"oxy_purple_black2.csv")
###added broad_category, removed duplicate hypo gene
oxy_purple_black2 <- read.csv("oxy_purple_black2.csv")




 ##Filter for avg_logFC > 0
oxy_up <- oxy_purple_black2[oxy_purple_black2$avg_logFC > 0, ]

# Filter for avg_logFC < 0
oxy_down <- oxy_purple_black2[oxy_purple_black2$avg_logFC < 0, ]


# Filter out 'unknown'
oxy_up_filtered <- oxy_up %>%
  filter(func_category != "unknown")

oxy_down_filtered <- oxy_down %>%
  filter(func_category != "unknown")

dim(oxy_up_filtered)
## [1] 11 10
###makes a clipping for the color (so its not all washed out)

oxy_up_filtered$clipped_avg_logFC <- pmin(pmax(oxy_up_filtered$avg_logFC, -4), 4)

oxy_down_filtered$clipped_avg_logFC <- pmin(pmax(oxy_down_filtered$avg_logFC, -4), 4)
# Combine the datasets with an identifier for each
oxy_up_filtered$set <- "Up"
oxy_down_filtered$set <- "Down"

combined_dataO <- rbind(oxy_up_filtered, oxy_down_filtered)
# Reverse the levels so that "Up" comes before "Down"
combined_dataO$set <- factor(combined_dataO$set, levels = c("Up", "Down"))

combined_dataO <- combined_dataO %>%
  mutate(adj_avg_logFC = case_when(
    set == "Up" & avg_logFC < 0 ~ 0,
    set == "Down" & avg_logFC > 0 ~ 0,
    TRUE ~ avg_logFC
  ))

  

# Plotting
p <- ggplot(combined_dataO, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
  geom_point(size = 3) +
  geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
  labs(title = "Average logFC under oxygen limiting conditions",
       x = "Adjusted Average LogFC",
       y = "Genes",
       color = "Avg LogFC Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(cols = vars(set), rows= vars(func_category), scales = "free_y", space = "free_y")

print(p)

####trying to combine both plots so I have same persepectives for figure

names(combined_dataO)
##  [1] "X"                   "locusID"             "moduleFil.x"        
##  [4] "n"                   "avg_logFC"           "sd_logFC"           
##  [7] "treatment_logFC_ave" "short.names"         "moduleFil.y"        
## [10] "func_category"       "clipped_avg_logFC"   "set"                
## [13] "adj_avg_logFC"
combined_dataO <- combined_dataO[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]

combined_dataS <- combined_dataS[,c("locusID","moduleFil.x","moduleFil.y","avg_logFC","sd_logFC","treatment_logFC_ave","short.names","func_category","clipped_avg_logFC","adj_avg_logFC", "set")]

names(combined_dataS)
##  [1] "locusID"             "moduleFil.x"         "moduleFil.y"        
##  [4] "avg_logFC"           "sd_logFC"            "treatment_logFC_ave"
##  [7] "short.names"         "func_category"       "clipped_avg_logFC"  
## [10] "adj_avg_logFC"       "set"
combinedOS <- rbind(combined_dataO,combined_dataS)


# Plotting
OS <- ggplot(combinedOS, aes(x = adj_avg_logFC, y = short.names, color = clipped_avg_logFC)) +
  geom_point(size = 3) +
  geom_errorbar(aes(xmin = adj_avg_logFC - sd_logFC, xmax = adj_avg_logFC + sd_logFC), width = 0.25) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0) +
  labs(title = "Average logFC under limiting conditions",
       x = "Adjusted Average LogFC",
       y = "Genes",
       color = "Avg LogFC Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  facet_grid(cols = vars(treatment_logFC_ave), rows= vars(func_category), scales = "free_y", space = "free_y")

print(OS)

###remaking figure 6 for paper such that all of the data points (n= condition tereatments)

###here are the genes in the gold module correlated to sulfide
all_together <- read.csv("wgcna_trait_module_hub_genes.csv")
#names(all_together)
all_together <- all_together[,1:4]



shg_DE_av <- shg_DE2[,c(1,44,50,53)]

head(shg_DE_av)
##              locusID     logFC      comparison      moduleFil
## 1 gene-L0Y14_RS10410 -6.607549 LS_vs_HS_noN_HO darkgoldenrod1
## 2 gene-L0Y14_RS00080 -3.742340 LS_vs_HS_noN_HO darkgoldenrod1
## 3 gene-L0Y14_RS01985 -5.245126 LS_vs_HS_noN_HO darkgoldenrod1
## 4 gene-L0Y14_RS07695  4.361391 LS_vs_HS_noN_HO darkgoldenrod1
## 5 gene-L0Y14_RS11995  2.598792 LS_vs_HS_noN_HO darkgoldenrod1
## 6 gene-L0Y14_RS11990  2.739793 LS_vs_HS_noN_HO darkgoldenrod1
#names(shg_DE_av)

###here are the genes in the teal module correlated to sulfide

names(sht_DE3)
##  [1] "X.1"                          "locusID"                     
##  [3] "geneid"                       "start.x"                     
##  [5] "stop.x"                       "gene_letters_andre"          
##  [7] "accession_andre"              "start"                       
##  [9] "stop"                         "notes"                       
## [11] "Ontology"                     "Inference"                   
## [13] "product"                      "protein_id"                  
## [15] "gene_letters.notes"           "NCBI_ID"                     
## [17] "annotations_from_andre"       "target_id"                   
## [19] "Accession"                    "functional_role"             
## [21] "sub_role"                     "sub_role2"                   
## [23] "Protein"                      "Accession...Protein"         
## [25] "seed_ortholog_evalue"         "seed_ortholog_score"         
## [27] "best_tax_level"               "Preferred_name"              
## [29] "GOs"                          "X"                           
## [31] "KEGG_ko"                      "KEGG_Pathway"                
## [33] "KEGG_Module"                  "KEGG_Reaction"               
## [35] "KEGG_rclass"                  "BRITE"                       
## [37] "KEGG_TC"                      "CAZy"                        
## [39] "BiGG_Reaction"                "tax_scope"                   
## [41] "eggNOG_OGs"                   "bestOG"                      
## [43] "COG_Functional_Category"      "eggNOG_free_text_description"
## [45] "logFC"                        "AveExpr"                     
## [47] "t"                            "P.value"                     
## [49] "adj.P.Val"                    "B"                           
## [51] "comparison"                   "Predicted_function"          
## [53] "condition"                    "moduleFil"
v <- unique(sht_DE3$locusID)
sht_DE3_av <- sht_DE3[,c(2,45,51,54)]
head(sht_DE3_av)
##              locusID     logFC      comparison moduleFil
## 1 gene-L0Y14_RS00400 -5.671978 LS_vs_HS_noN_HO      teal
## 2 gene-L0Y14_RS01755 -1.093415 LS_vs_HS_noN_HO      teal
## 3 gene-L0Y14_RS04610 -1.158590 LS_vs_HS_noN_HO      teal
## 4 gene-L0Y14_RS05075 -1.192507 LS_vs_HS_noN_HO      teal
## 5 gene-L0Y14_RS05080 -2.083575 LS_vs_HS_noN_HO      teal
## 6 gene-L0Y14_RS07060 -1.072405 LS_vs_HS_noN_HO      teal
#names(sht_DE3_av)

sulfide_gt <- rbind(sht_DE3_av,shg_DE_av)


#names(sht_DE3_av)


#names(all_together)

shortnames <- all_together[,1:2]

sulfideGT <- merge(sulfide_gt,shortnames)
list(unique(sulfideGT$short.names))
## [[1]]
##  [1] "glnL"             "torF"             "norB"             "nirS"            
##  [5] "norB1"            "norD2"            "psrA"             "fusA"            
##  [9] "nirB"             "trmD"             "YqgE/AlgH"        "ibpA"            
## [13] "pyrG"             "ftsH"             "rimP"             "nusA"            
## [17] "fusA2"            "napG"             "moaA"             "DUF4340"         
## [21] "fusA_2"           "holA"             "hupE"             "HyiA_small"      
## [25] "isp1"             "isp2"             "HyiB_large"       "hypo"            
## [29] "frdB_1"           "frdC_1"           "htpX"             "cmoB"            
## [33] "peptidase"        "leuC"             "ferritin_like_AB" "korB_2"          
## [37] "korA_2"           "rhlE"             "DUF2058"          "dnaJ"            
## [41] "recN"             "hdrB"             "korA_1"           "korB_1"          
## [45] "korC"             "hdrA"             "flxD"             "flxC"            
## [49] "flxB"             "flxA"             "aclB_full"        "aclA"            
## [53] "idh"              "acnA"             "pntA"             "pntB"            
## [57] "mdh_1"            "pfor_3"           "ispB"             "porin"           
## [61] "clpB"             "DUF4126"          "rho"              "ptsP_E1"         
## [65] "feoB"             "mcrA"             "korA_3"           "korB_3"          
## [69] "pfor_like"
desired_order2 <- c("pntB","pntA","mdh_1","korC","korB_1","korA_1","idh","hdrB", "hdrA", "flxD","flxC","flxB", "flxA","acnA","aclB_full","aclA","korB_2","korA_2","korB_3","korA_3","pfor_like","pfor_3","frdC_1" ,"frdB_1","HyiB_large","isp1","isp2","HyiA_small", "hupE","hypo","norD2","norB1" ,"norB", "nirS","napG","nirB","glnL","fusA2","mcrA","fusA_2","fusA","trmD","rimP","rho","rhlE","recN","peptidase","nusA","ibpA","htpX","holA","ftsH", "dnaJ","cmoB","clpB","pyrG","psrA","moaA","leuC","ispB","torF","ptsP_E1","porin","feoB")

filteredGT <- subset(sulfideGT, short.names %in% desired_order2)
filteredGT$short.names <- factor(filteredGT$short.names, levels = desired_order2)


filteredGT$clipped_logFC <- pmin(pmax(filteredGT$logFC, -4), 4)

try <- ggplot(data=filteredGT, aes(x= logFC, y = fct_rev(short.names))) +
  geom_boxplot()+
  geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
  labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
  theme_minimal()
  
try

table(sulfideGT$comparison,sulfideGT$short.names)
##                  
##                   aclA aclB_full acnA clpB cmoB dnaJ DUF2058 DUF4126 DUF4340
##   H2_vs_HS_N_HO      0         0    0    1    1    1       1       1       1
##   H2_vs_HS_noN_HO    1         1    1    1    1    1       0       0       0
##   LS_vs_HS_N_HO      0         0    0    1    0    0       1       1       0
##   LS_vs_HS_noN_HO    1         1    1    1    0    1       0       0       0
##   SW_vs_HS_noN_HO    1         1    1    1    1    1       1       1       1
##                  
##                   feoB ferritin_like_AB flxA flxB flxC flxD frdB_1 frdC_1 ftsH
##   H2_vs_HS_N_HO      1                1    0    0    0    0      2      1    1
##   H2_vs_HS_noN_HO    0                1    1    1    1    1      2      1    1
##   LS_vs_HS_N_HO      1                0    0    0    0    0      0      0    1
##   LS_vs_HS_noN_HO    0                0    1    1    1    1      2      1    0
##   SW_vs_HS_noN_HO    1                1    1    1    1    1      2      1    1
##                  
##                   fusA fusA_2 fusA2 glnL hdrA hdrB holA htpX hupE HyiA_small
##   H2_vs_HS_N_HO      1      1     1    0    0    0    1    1    1          1
##   H2_vs_HS_noN_HO    1      1     1    1    1    1    1    1    0          1
##   LS_vs_HS_N_HO      0      0     0    0    0    0    0    1    0          1
##   LS_vs_HS_noN_HO    1      1     1    1    1    1    0    0    0          0
##   SW_vs_HS_noN_HO    1      1     1    1    1    1    1    1    0          1
##                  
##                   HyiB_large hypo ibpA idh isp1 isp2 ispB korA_1 korA_2 korA_3
##   H2_vs_HS_N_HO            1    2    1   0    1    1    1      0      1      1
##   H2_vs_HS_noN_HO          1    2    1   1    1    1    1      1      1      1
##   LS_vs_HS_N_HO            1    1    1   0    1    1    1      0      0      1
##   LS_vs_HS_noN_HO          0    0    0   1    0    0    0      1      0      1
##   SW_vs_HS_noN_HO          1    1    1   1    1    1    1      1      1      1
##                  
##                   korB_1 korB_2 korB_3 korC leuC mcrA mdh_1 moaA napG nirB nirS
##   H2_vs_HS_N_HO        0      1      1    0    0    1     0    1    1    0    1
##   H2_vs_HS_noN_HO      1      1      1    1    1    0     1    0    1    1    1
##   LS_vs_HS_N_HO        0      0      0    0    0    0     0    0    0    0    1
##   LS_vs_HS_noN_HO      1      0      1    1    1    0     1    0    0    1    0
##   SW_vs_HS_noN_HO      1      1      1    1    1    0     1    1    1    1    1
##                  
##                   norB norB1 norD2 nusA peptidase pfor_3 pfor_like pntA pntB
##   H2_vs_HS_N_HO      1     1     1    1         0      1         1    0    0
##   H2_vs_HS_noN_HO    1     1     1    1         1      1         1    1    1
##   LS_vs_HS_N_HO      0     0     0    1         0      0         1    0    0
##   LS_vs_HS_noN_HO    0     0     1    1         1      0         0    1    1
##   SW_vs_HS_noN_HO    1     1     1    1         1      1         1    1    1
##                  
##                   porin psrA ptsP_E1 pyrG recN rhlE rho rimP torF trmD
##   H2_vs_HS_N_HO       1    1       1    1    1    1   1    1    1    1
##   H2_vs_HS_noN_HO     1    1       1    1    1    1   1    1    1    1
##   LS_vs_HS_N_HO       1    0       0    0    0    1   0    0    0    0
##   LS_vs_HS_noN_HO     1    0       1    1    1    0   0    1    1    0
##   SW_vs_HS_noN_HO     1    1       1    1    1    1   1    1    1    1
##                  
##                   YqgE/AlgH
##   H2_vs_HS_N_HO           1
##   H2_vs_HS_noN_HO         1
##   LS_vs_HS_N_HO           0
##   LS_vs_HS_noN_HO         0
##   SW_vs_HS_noN_HO         1
###### now doing this for oxygen with purple and black modules, which I named pox and box

pb_ox <- rbind(pox,box)

###changing "hypo2 to another name so it wont get filtered out
pb_ox <- pb_ox %>%
  mutate(short.names = ifelse(short.names == "hypo2", "unknown_hyd1e", short.names))



####oops this caught the hypo2 from the other module as well, so filtering that out

pb_ox <- pb_ox %>%
  filter(!grepl("gene-L0Y14_RS03075", locusID))

####getting rid of the hypotheticals


###in case I want this list later
hypos_fil <- pb_ox %>%
  filter(grepl("^hypo", short.names))


###ok, again with the filter
pb_ox_fil <- pb_ox %>%
  filter(!grepl("^hypo", short.names))

###checking if genes are overlapping (meaning found in both modules....they aren't)
#table(pb_ox_fil$moduleFil,pb_ox_fil$short.names)

#print(unique(pb_ox_fil$short.names))

desired_order3 <- c("nifJ","ppdK","malQ","GH16","acnB","fabA","unknown_hyd1e","hypE","isp2","hybO","hupE","cphA","SgpA_1","fusA2","mrcA","yidD","yidC","yebA","tsaC","rocR","rpmB","prsK","LT_domain","lolA_like","IscA","HIT","himD","gmhA","ftsE","dut","cbbQ","bamD","Prxs","Hr","fliK1","FBP")

#list(desired_order3)

####ok, some of these are unknowns that I left off of the fgure, so I am going to filter them out

pb_ox_fil2 <- subset(pb_ox_fil, short.names %in% desired_order3)
pb_ox_fil2$short.names <- factor(pb_ox_fil2$short.names, levels = desired_order3)

pb_ox_fil2$clipped_logFC <- pmin(pmax(pb_ox_fil2$logFC, -4), 4)


try2 <- ggplot(data=pb_ox_fil2, aes(x= logFC, y = fct_rev(short.names))) +
  geom_boxplot()+
  geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
  labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
  scale_x_reverse()+
  theme_minimal()
  
try2

table(pb_ox_fil2$comparison)
## 
## LO_vs_HO_H2_noN   LO_vs_HO_HS_N 
##              19              30
####ok, I have a problem, I can't get the dimensions to match for adobe illustrator, so adding dummy variables to save me time in illustrator

# Define the number of dummy entries you need
num_dummy <- 33

# Creating the dummy data frame with unique dummy names
dummy_data2 <- data.frame(
  locusID= rep("fakeid",num_dummy),
  logFC = rep(0, num_dummy), # Sets all logFC values to 0
  comparison=rep("fakecomparison",num_dummy),
  moduleFil = rep("fakemoduleFil",num_dummy),
  short.names = paste("Dummy", 1:num_dummy, sep=""),
  clipped_logFC = rep(4, num_dummy)# Creates "Dummy1", "Dummy2", ..., "Dummy33"
)


pb_ox_filDUMM <- rbind(pb_ox_fil2,dummy_data2)


try3 <- ggplot(data=pb_ox_filDUMM, aes(x= logFC, y = fct_rev(short.names))) +
  geom_boxplot()+
  geom_point(aes(color = clipped_logFC), position = position_jitter(width = 0.1,height = 0.1)) +
  scale_color_gradient2(low = "#2166ac", mid = "white", high = "#B2182B", midpoint = 0)+
  labs(title = "Boxplot with Colored Dots", x = "Category", y = "Value") +
  scale_x_reverse()+
  theme_minimal()
  
try3